xref: /petsc/src/ksp/pc/impls/bjacobi/bjacobi.c (revision f2ed2dc71a2ab9ffda85eae8afa0cbea9ed570de)
1 
2 /*
3    Defines a block Jacobi preconditioner.
4 */
5 
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /*I "petscpc.h" I*/
7 
8 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
9 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
10 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
11 
12 static PetscErrorCode PCSetUp_BJacobi(PC pc)
13 {
14   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
15   Mat            mat  = pc->mat,pmat = pc->pmat;
16   PetscErrorCode ierr;
17   PetscBool      hasop;
18   PetscInt       N,M,start,i,sum,end;
19   PetscInt       bs,i_start=-1,i_end=-1;
20   PetscMPIInt    rank,size;
21   const char     *pprefix,*mprefix;
22 
23   PetscFunctionBegin;
24   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
25   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
26   ierr = MatGetLocalSize(pc->pmat,&M,&N);CHKERRQ(ierr);
27   ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
28 
29   if (jac->n > 0 && jac->n < size) {
30     ierr = PCSetUp_BJacobi_Multiproc(pc);CHKERRQ(ierr);
31     PetscFunctionReturn(0);
32   }
33 
34   /* --------------------------------------------------------------------------
35       Determines the number of blocks assigned to each processor
36   -----------------------------------------------------------------------------*/
37 
38   /*   local block count  given */
39   if (jac->n_local > 0 && jac->n < 0) {
40     ierr = MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
41     if (jac->l_lens) { /* check that user set these correctly */
42       sum = 0;
43       for (i=0; i<jac->n_local; i++) {
44         if (jac->l_lens[i]/bs*bs !=jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
45         sum += jac->l_lens[i];
46       }
47       if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
48     } else {
49       ierr = PetscMalloc1(jac->n_local,&jac->l_lens);CHKERRQ(ierr);
50       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
51     }
52   } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
53     /* global blocks given: determine which ones are local */
54     if (jac->g_lens) {
55       /* check if the g_lens is has valid entries */
56       for (i=0; i<jac->n; i++) {
57         if (!jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
58         if (jac->g_lens[i]/bs*bs != jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
59       }
60       if (size == 1) {
61         jac->n_local = jac->n;
62         ierr         = PetscMalloc1(jac->n_local,&jac->l_lens);CHKERRQ(ierr);
63         ierr         = PetscArraycpy(jac->l_lens,jac->g_lens,jac->n_local);CHKERRQ(ierr);
64         /* check that user set these correctly */
65         sum = 0;
66         for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
67         if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
68       } else {
69         ierr = MatGetOwnershipRange(pc->pmat,&start,&end);CHKERRQ(ierr);
70         /* loop over blocks determing first one owned by me */
71         sum = 0;
72         for (i=0; i<jac->n+1; i++) {
73           if (sum == start) { i_start = i; goto start_1;}
74           if (i < jac->n) sum += jac->g_lens[i];
75         }
76         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
77 start_1:
78         for (i=i_start; i<jac->n+1; i++) {
79           if (sum == end) { i_end = i; goto end_1; }
80           if (i < jac->n) sum += jac->g_lens[i];
81         }
82         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83 end_1:
84         jac->n_local = i_end - i_start;
85         ierr         = PetscMalloc1(jac->n_local,&jac->l_lens);CHKERRQ(ierr);
86         ierr         = PetscArraycpy(jac->l_lens,jac->g_lens+i_start,jac->n_local);CHKERRQ(ierr);
87       }
88     } else { /* no global blocks given, determine then using default layout */
89       jac->n_local = jac->n/size + ((jac->n % size) > rank);
90       ierr         = PetscMalloc1(jac->n_local,&jac->l_lens);CHKERRQ(ierr);
91       for (i=0; i<jac->n_local; i++) {
92         jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
93         if (!jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
94       }
95     }
96   } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97     jac->n         = size;
98     jac->n_local   = 1;
99     ierr           = PetscMalloc1(1,&jac->l_lens);CHKERRQ(ierr);
100     jac->l_lens[0] = M;
101   } else { /* jac->n > 0 && jac->n_local > 0 */
102     if (!jac->l_lens) {
103       ierr = PetscMalloc1(jac->n_local,&jac->l_lens);CHKERRQ(ierr);
104       for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
105     }
106   }
107   if (jac->n_local < 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");
108 
109   /* -------------------------
110       Determines mat and pmat
111   ---------------------------*/
112   ierr = MatHasOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
113   if (!hasop && size == 1) {
114     mat  = pc->mat;
115     pmat = pc->pmat;
116   } else {
117     if (pc->useAmat) {
118       /* use block from Amat matrix, not Pmat for local MatMult() */
119       ierr = MatGetDiagonalBlock(pc->mat,&mat);CHKERRQ(ierr);
120       /* make submatrix have same prefix as entire matrix */
121       ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->mat,&mprefix);CHKERRQ(ierr);
122       ierr = PetscObjectSetOptionsPrefix((PetscObject)mat,mprefix);CHKERRQ(ierr);
123     }
124     if (pc->pmat != pc->mat || !pc->useAmat) {
125       ierr = MatGetDiagonalBlock(pc->pmat,&pmat);CHKERRQ(ierr);
126       /* make submatrix have same prefix as entire matrix */
127       ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
128       ierr = PetscObjectSetOptionsPrefix((PetscObject)pmat,pprefix);CHKERRQ(ierr);
129     } else pmat = mat;
130   }
131 
132   /* ------
133      Setup code depends on the number of blocks
134   */
135   if (jac->n_local == 1) {
136     ierr = PCSetUp_BJacobi_Singleblock(pc,mat,pmat);CHKERRQ(ierr);
137   } else {
138     ierr = PCSetUp_BJacobi_Multiblock(pc,mat,pmat);CHKERRQ(ierr);
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 /* Default destroy, if it has never been setup */
144 static PetscErrorCode PCDestroy_BJacobi(PC pc)
145 {
146   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
147   PetscErrorCode ierr;
148 
149   PetscFunctionBegin;
150   ierr = PetscFree(jac->g_lens);CHKERRQ(ierr);
151   ierr = PetscFree(jac->l_lens);CHKERRQ(ierr);
152   ierr = PetscFree(pc->data);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 
157 static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
158 {
159   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
160   PetscErrorCode ierr;
161   PetscInt       blocks,i;
162   PetscBool      flg;
163 
164   PetscFunctionBegin;
165   ierr = PetscOptionsHead(PetscOptionsObject,"Block Jacobi options");CHKERRQ(ierr);
166   ierr = PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg);CHKERRQ(ierr);
167   if (flg) {ierr = PCBJacobiSetTotalBlocks(pc,blocks,NULL);CHKERRQ(ierr);}
168   ierr = PetscOptionsInt("-pc_bjacobi_local_blocks","Local number of blocks","PCBJacobiSetLocalBlocks",jac->n_local,&blocks,&flg);CHKERRQ(ierr);
169   if (flg) {ierr = PCBJacobiSetLocalBlocks(pc,blocks,NULL);CHKERRQ(ierr);}
170   if (jac->ksp) {
171     /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
172      * unless we had already been called. */
173     for (i=0; i<jac->n_local; i++) {
174       ierr = KSPSetFromOptions(jac->ksp[i]);CHKERRQ(ierr);
175     }
176   }
177   ierr = PetscOptionsTail();CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #include <petscdraw.h>
182 static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
183 {
184   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
185   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
186   PetscErrorCode       ierr;
187   PetscMPIInt          rank;
188   PetscInt             i;
189   PetscBool            iascii,isstring,isdraw;
190   PetscViewer          sviewer;
191 
192   PetscFunctionBegin;
193   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
194   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
195   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
196   if (iascii) {
197     if (pc->useAmat) {
198       ierr = PetscViewerASCIIPrintf(viewer,"  using Amat local matrix, number of blocks = %D\n",jac->n);CHKERRQ(ierr);
199     }
200     ierr = PetscViewerASCIIPrintf(viewer,"  number of blocks = %D\n",jac->n);CHKERRQ(ierr);
201     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
202     if (jac->same_local_solves) {
203       ierr = PetscViewerASCIIPrintf(viewer,"  Local solver is the same for all blocks, as in the following KSP and PC objects on rank 0:\n");CHKERRQ(ierr);
204       if (jac->ksp && !jac->psubcomm) {
205         ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
206         if (!rank) {
207           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
208           ierr = KSPView(jac->ksp[0],sviewer);CHKERRQ(ierr);
209           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
210         }
211         ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
212       } else if (mpjac && jac->ksp && mpjac->psubcomm) {
213         ierr = PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer);CHKERRQ(ierr);
214         if (!mpjac->psubcomm->color) {
215           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
216           ierr = KSPView(*(jac->ksp),sviewer);CHKERRQ(ierr);
217           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
218         }
219         ierr = PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer);CHKERRQ(ierr);
220       }
221     } else {
222       PetscInt n_global;
223       ierr = MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
224       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
225       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
226       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D, first local block number = %D\n",
227                                                 rank,jac->n_local,jac->first_local);CHKERRQ(ierr);
228       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
229       ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
230       for (i=0; i<jac->n_local; i++) {
231         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %D\n",rank,i);CHKERRQ(ierr);
232         ierr = KSPView(jac->ksp[i],sviewer);CHKERRQ(ierr);
233         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
234       }
235       ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
236       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
237       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
238       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
239     }
240   } else if (isstring) {
241     ierr = PetscViewerStringSPrintf(viewer," blks=%D",jac->n);CHKERRQ(ierr);
242     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
243     if (jac->ksp) {ierr = KSPView(jac->ksp[0],sviewer);CHKERRQ(ierr);}
244     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
245   } else if (isdraw) {
246     PetscDraw draw;
247     char      str[25];
248     PetscReal x,y,bottom,h;
249 
250     ierr   = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
251     ierr   = PetscDrawGetCurrentPoint(draw,&x,&y);CHKERRQ(ierr);
252     ierr   = PetscSNPrintf(str,25,"Number blocks %D",jac->n);CHKERRQ(ierr);
253     ierr   = PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h);CHKERRQ(ierr);
254     bottom = y - h;
255     ierr   = PetscDrawPushCurrentPoint(draw,x,bottom);CHKERRQ(ierr);
256     /* warning the communicator on viewer is different then on ksp in parallel */
257     if (jac->ksp) {ierr = KSPView(jac->ksp[0],viewer);CHKERRQ(ierr);}
258     ierr = PetscDrawPopCurrentPoint(draw);CHKERRQ(ierr);
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 /* -------------------------------------------------------------------------------------*/
264 
265 static PetscErrorCode  PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
266 {
267   PC_BJacobi *jac = (PC_BJacobi*)pc->data;
268 
269   PetscFunctionBegin;
270   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");
271 
272   if (n_local) *n_local = jac->n_local;
273   if (first_local) *first_local = jac->first_local;
274   *ksp                   = jac->ksp;
275   jac->same_local_solves = PETSC_FALSE;        /* Assume that local solves are now different;
276                                                   not necessarily true though!  This flag is
277                                                   used only for PCView_BJacobi() */
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode  PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
282 {
283   PC_BJacobi     *jac = (PC_BJacobi*)pc->data;
284   PetscErrorCode ierr;
285 
286   PetscFunctionBegin;
287   if (pc->setupcalled > 0 && jac->n!=blocks) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
288   jac->n = blocks;
289   if (!lens) jac->g_lens = NULL;
290   else {
291     ierr = PetscMalloc1(blocks,&jac->g_lens);CHKERRQ(ierr);
292     ierr = PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));CHKERRQ(ierr);
293     ierr = PetscArraycpy(jac->g_lens,lens,blocks);CHKERRQ(ierr);
294   }
295   PetscFunctionReturn(0);
296 }
297 
298 static PetscErrorCode  PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
299 {
300   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
301 
302   PetscFunctionBegin;
303   *blocks = jac->n;
304   if (lens) *lens = jac->g_lens;
305   PetscFunctionReturn(0);
306 }
307 
308 static PetscErrorCode  PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
309 {
310   PC_BJacobi     *jac;
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   jac = (PC_BJacobi*)pc->data;
315 
316   jac->n_local = blocks;
317   if (!lens) jac->l_lens = NULL;
318   else {
319     ierr = PetscMalloc1(blocks,&jac->l_lens);CHKERRQ(ierr);
320     ierr = PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));CHKERRQ(ierr);
321     ierr = PetscArraycpy(jac->l_lens,lens,blocks);CHKERRQ(ierr);
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 static PetscErrorCode  PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
327 {
328   PC_BJacobi *jac = (PC_BJacobi*) pc->data;
329 
330   PetscFunctionBegin;
331   *blocks = jac->n_local;
332   if (lens) *lens = jac->l_lens;
333   PetscFunctionReturn(0);
334 }
335 
336 /* -------------------------------------------------------------------------------------*/
337 
338 /*@C
339    PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
340    this processor.
341 
342    Not Collective
343 
344    Input Parameter:
345 .  pc - the preconditioner context
346 
347    Output Parameters:
348 +  n_local - the number of blocks on this processor, or NULL
349 .  first_local - the global number of the first block on this processor, or NULL
350 -  ksp - the array of KSP contexts
351 
352    Notes:
353    After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
354 
355    Currently for some matrix implementations only 1 block per processor
356    is supported.
357 
358    You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
359 
360    Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
361       You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
362       KSP array must be.
363 
364    Level: advanced
365 
366 .seealso: PCBJacobiGetSubKSP()
367 @*/
368 PetscErrorCode  PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
369 {
370   PetscErrorCode ierr;
371 
372   PetscFunctionBegin;
373   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
374   ierr = PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
375   PetscFunctionReturn(0);
376 }
377 
378 /*@
379    PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
380    Jacobi preconditioner.
381 
382    Collective on PC
383 
384    Input Parameters:
385 +  pc - the preconditioner context
386 .  blocks - the number of blocks
387 -  lens - [optional] integer array containing the size of each block
388 
389    Options Database Key:
390 .  -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
391 
392    Notes:
393    Currently only a limited number of blocking configurations are supported.
394    All processors sharing the PC must call this routine with the same data.
395 
396    Level: intermediate
397 
398 .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks()
399 @*/
400 PetscErrorCode  PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
401 {
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
406   if (blocks <= 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
407   ierr = PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens));CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 /*@C
412    PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
413    Jacobi preconditioner.
414 
415    Not Collective
416 
417    Input Parameter:
418 .  pc - the preconditioner context
419 
420    Output parameters:
421 +  blocks - the number of blocks
422 -  lens - integer array containing the size of each block
423 
424    Level: intermediate
425 
426 .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks()
427 @*/
428 PetscErrorCode  PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
429 {
430   PetscErrorCode ierr;
431 
432   PetscFunctionBegin;
433   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
434   PetscValidIntPointer(blocks,2);
435   ierr = PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 /*@
440    PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
441    Jacobi preconditioner.
442 
443    Not Collective
444 
445    Input Parameters:
446 +  pc - the preconditioner context
447 .  blocks - the number of blocks
448 -  lens - [optional] integer array containing size of each block
449 
450    Options Database Key:
451 .  -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
452 
453    Note:
454    Currently only a limited number of blocking configurations are supported.
455 
456    Level: intermediate
457 
458 .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks()
459 @*/
460 PetscErrorCode  PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
461 {
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
466   if (blocks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
467   ierr = PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens));CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 /*@C
472    PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
473    Jacobi preconditioner.
474 
475    Not Collective
476 
477    Input Parameters:
478 +  pc - the preconditioner context
479 .  blocks - the number of blocks
480 -  lens - [optional] integer array containing size of each block
481 
482    Note:
483    Currently only a limited number of blocking configurations are supported.
484 
485    Level: intermediate
486 
487 .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks()
488 @*/
489 PetscErrorCode  PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
490 {
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   PetscValidHeaderSpecific(pc, PC_CLASSID,1);
495   PetscValidIntPointer(blocks,2);
496   ierr = PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 /* -----------------------------------------------------------------------------------*/
501 
502 /*MC
503    PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
504            its own KSP object.
505 
506    Options Database Keys:
507 +  -pc_use_amat - use Amat to apply block of operator in inner Krylov method
508 -  -pc_bjacobi_blocks <n> - use n total blocks
509 
510    Notes:
511     Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
512 
513      To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
514         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
515 
516      To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
517          and set the options directly on the resulting KSP object (you can access its PC
518          KSPGetPC())
519 
520      For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best
521          performance.  Different block partitioning may lead to additional data transfers
522          between host and GPU that lead to degraded performance.
523 
524      The options prefix for each block is sub_, for example -sub_pc_type lu.
525 
526      When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
527 
528    Level: beginner
529 
530 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
531            PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(),
532            PCBJacobiSetLocalBlocks(), PCSetModifySubMatrices()
533 M*/
534 
535 PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
536 {
537   PetscErrorCode ierr;
538   PetscMPIInt    rank;
539   PC_BJacobi     *jac;
540 
541   PetscFunctionBegin;
542   ierr = PetscNewLog(pc,&jac);CHKERRQ(ierr);
543   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
544 
545   pc->ops->apply           = NULL;
546   pc->ops->matapply        = NULL;
547   pc->ops->applytranspose  = NULL;
548   pc->ops->setup           = PCSetUp_BJacobi;
549   pc->ops->destroy         = PCDestroy_BJacobi;
550   pc->ops->setfromoptions  = PCSetFromOptions_BJacobi;
551   pc->ops->view            = PCView_BJacobi;
552   pc->ops->applyrichardson = NULL;
553 
554   pc->data               = (void*)jac;
555   jac->n                 = -1;
556   jac->n_local           = -1;
557   jac->first_local       = rank;
558   jac->ksp               = NULL;
559   jac->same_local_solves = PETSC_TRUE;
560   jac->g_lens            = NULL;
561   jac->l_lens            = NULL;
562   jac->psubcomm          = NULL;
563 
564   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 /* --------------------------------------------------------------------------------------------*/
573 /*
574         These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
575 */
576 static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
577 {
578   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
579   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
580   PetscErrorCode         ierr;
581 
582   PetscFunctionBegin;
583   ierr = KSPReset(jac->ksp[0]);CHKERRQ(ierr);
584   ierr = VecDestroy(&bjac->x);CHKERRQ(ierr);
585   ierr = VecDestroy(&bjac->y);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
590 {
591   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
592   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
593   PetscErrorCode         ierr;
594 
595   PetscFunctionBegin;
596   ierr = PCReset_BJacobi_Singleblock(pc);CHKERRQ(ierr);
597   ierr = KSPDestroy(&jac->ksp[0]);CHKERRQ(ierr);
598   ierr = PetscFree(jac->ksp);CHKERRQ(ierr);
599   ierr = PetscFree(jac->l_lens);CHKERRQ(ierr);
600   ierr = PetscFree(jac->g_lens);CHKERRQ(ierr);
601   ierr = PetscFree(bjac);CHKERRQ(ierr);
602   ierr = PetscFree(pc->data);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
607 {
608   PetscErrorCode     ierr;
609   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
610   KSP                subksp = jac->ksp[0];
611   KSPConvergedReason reason;
612 
613   PetscFunctionBegin;
614   ierr = KSPSetUp(subksp);CHKERRQ(ierr);
615   ierr = KSPGetConvergedReason(subksp,&reason);CHKERRQ(ierr);
616   if (reason == KSP_DIVERGED_PC_FAILED) {
617     pc->failedreason = PC_SUBPC_ERROR;
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
623 {
624   PetscErrorCode         ierr;
625   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
626   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
627 
628   PetscFunctionBegin;
629   ierr = VecGetLocalVectorRead(x, bjac->x);CHKERRQ(ierr);
630   ierr = VecGetLocalVector(y, bjac->y);CHKERRQ(ierr);
631  /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
632      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
633      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
634   ierr = KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner);CHKERRQ(ierr);
635   ierr = KSPSolve(jac->ksp[0],bjac->x,bjac->y);CHKERRQ(ierr);
636   ierr = KSPCheckSolve(jac->ksp[0],pc,bjac->y);CHKERRQ(ierr);
637   ierr = VecRestoreLocalVectorRead(x, bjac->x);CHKERRQ(ierr);
638   ierr = VecRestoreLocalVector(y, bjac->y);CHKERRQ(ierr);
639   PetscFunctionReturn(0);
640 }
641 
642 static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc,Mat X,Mat Y)
643 {
644   PC_BJacobi     *jac  = (PC_BJacobi*)pc->data;
645   Mat            sX,sY;
646   PetscErrorCode ierr;
647 
648   PetscFunctionBegin;
649  /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
650      matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
651      of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
652   ierr = KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner);CHKERRQ(ierr);
653   ierr = MatDenseGetLocalMatrix(X,&sX);CHKERRQ(ierr);
654   ierr = MatDenseGetLocalMatrix(Y,&sY);CHKERRQ(ierr);
655   ierr = KSPMatSolve(jac->ksp[0],sX,sY);CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
660 {
661   PetscErrorCode         ierr;
662   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
663   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
664   PetscScalar            *y_array;
665   const PetscScalar      *x_array;
666   PC                     subpc;
667 
668   PetscFunctionBegin;
669   /*
670       The VecPlaceArray() is to avoid having to copy the
671     y vector into the bjac->x vector. The reason for
672     the bjac->x vector is that we need a sequential vector
673     for the sequential solve.
674   */
675   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
676   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
677   ierr = VecPlaceArray(bjac->x,x_array);CHKERRQ(ierr);
678   ierr = VecPlaceArray(bjac->y,y_array);CHKERRQ(ierr);
679   /* apply the symmetric left portion of the inner PC operator */
680   /* note this by-passes the inner KSP and its options completely */
681   ierr = KSPGetPC(jac->ksp[0],&subpc);CHKERRQ(ierr);
682   ierr = PCApplySymmetricLeft(subpc,bjac->x,bjac->y);CHKERRQ(ierr);
683   ierr = VecResetArray(bjac->x);CHKERRQ(ierr);
684   ierr = VecResetArray(bjac->y);CHKERRQ(ierr);
685   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
686   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
691 {
692   PetscErrorCode         ierr;
693   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
694   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
695   PetscScalar            *y_array;
696   const PetscScalar      *x_array;
697   PC                     subpc;
698 
699   PetscFunctionBegin;
700   /*
701       The VecPlaceArray() is to avoid having to copy the
702     y vector into the bjac->x vector. The reason for
703     the bjac->x vector is that we need a sequential vector
704     for the sequential solve.
705   */
706   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
707   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
708   ierr = VecPlaceArray(bjac->x,x_array);CHKERRQ(ierr);
709   ierr = VecPlaceArray(bjac->y,y_array);CHKERRQ(ierr);
710 
711   /* apply the symmetric right portion of the inner PC operator */
712   /* note this by-passes the inner KSP and its options completely */
713 
714   ierr = KSPGetPC(jac->ksp[0],&subpc);CHKERRQ(ierr);
715   ierr = PCApplySymmetricRight(subpc,bjac->x,bjac->y);CHKERRQ(ierr);
716 
717   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
718   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
723 {
724   PetscErrorCode         ierr;
725   PC_BJacobi             *jac  = (PC_BJacobi*)pc->data;
726   PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
727   PetscScalar            *y_array;
728   const PetscScalar      *x_array;
729 
730   PetscFunctionBegin;
731   /*
732       The VecPlaceArray() is to avoid having to copy the
733     y vector into the bjac->x vector. The reason for
734     the bjac->x vector is that we need a sequential vector
735     for the sequential solve.
736   */
737   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
738   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
739   ierr = VecPlaceArray(bjac->x,x_array);CHKERRQ(ierr);
740   ierr = VecPlaceArray(bjac->y,y_array);CHKERRQ(ierr);
741   ierr = KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y);CHKERRQ(ierr);
742   ierr = KSPCheckSolve(jac->ksp[0],pc,bjac->y);CHKERRQ(ierr);
743   ierr = VecResetArray(bjac->x);CHKERRQ(ierr);
744   ierr = VecResetArray(bjac->y);CHKERRQ(ierr);
745   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
746   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
747   PetscFunctionReturn(0);
748 }
749 
750 static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
751 {
752   PC_BJacobi             *jac = (PC_BJacobi*)pc->data;
753   PetscErrorCode         ierr;
754   PetscInt               m;
755   KSP                    ksp;
756   PC_BJacobi_Singleblock *bjac;
757   PetscBool              wasSetup = PETSC_TRUE;
758 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
759   PetscBool              is_gpumatrix = PETSC_FALSE;
760 #endif
761 
762   PetscFunctionBegin;
763   if (!pc->setupcalled) {
764     const char *prefix;
765 
766     if (!jac->ksp) {
767       wasSetup = PETSC_FALSE;
768 
769       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
770       ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
771       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
772       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
773       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
774       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
775       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
776       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
777 
778       pc->ops->reset               = PCReset_BJacobi_Singleblock;
779       pc->ops->destroy             = PCDestroy_BJacobi_Singleblock;
780       pc->ops->apply               = PCApply_BJacobi_Singleblock;
781       pc->ops->matapply            = PCMatApply_BJacobi_Singleblock;
782       pc->ops->applysymmetricleft  = PCApplySymmetricLeft_BJacobi_Singleblock;
783       pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
784       pc->ops->applytranspose      = PCApplyTranspose_BJacobi_Singleblock;
785       pc->ops->setuponblocks       = PCSetUpOnBlocks_BJacobi_Singleblock;
786 
787       ierr        = PetscMalloc1(1,&jac->ksp);CHKERRQ(ierr);
788       jac->ksp[0] = ksp;
789 
790       ierr      = PetscNewLog(pc,&bjac);CHKERRQ(ierr);
791       jac->data = (void*)bjac;
792     } else {
793       ksp  = jac->ksp[0];
794       bjac = (PC_BJacobi_Singleblock*)jac->data;
795     }
796 
797     /*
798       The reason we need to generate these vectors is to serve
799       as the right-hand side and solution vector for the solve on the
800       block. We do not need to allocate space for the vectors since
801       that is provided via VecPlaceArray() just before the call to
802       KSPSolve() on the block.
803     */
804     ierr = MatGetSize(pmat,&m,&m);CHKERRQ(ierr);
805     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x);CHKERRQ(ierr);
806     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y);CHKERRQ(ierr);
807 #if defined(PETSC_HAVE_CUDA)
808     ierr = PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
809     if (is_gpumatrix) {
810       ierr = VecSetType(bjac->x,VECCUDA);CHKERRQ(ierr);
811       ierr = VecSetType(bjac->y,VECCUDA);CHKERRQ(ierr);
812     }
813 #endif
814 #if defined(PETSC_HAVE_VIENNACL)
815     ierr = PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");CHKERRQ(ierr);
816     if (is_gpumatrix) {
817       ierr = VecSetType(bjac->x,VECVIENNACL);CHKERRQ(ierr);
818       ierr = VecSetType(bjac->y,VECVIENNACL);CHKERRQ(ierr);
819     }
820 #endif
821     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x);CHKERRQ(ierr);
822     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y);CHKERRQ(ierr);
823   } else {
824     ksp  = jac->ksp[0];
825     bjac = (PC_BJacobi_Singleblock*)jac->data;
826   }
827   if (pc->useAmat) {
828     ierr = KSPSetOperators(ksp,mat,pmat);CHKERRQ(ierr);
829   } else {
830     ierr = KSPSetOperators(ksp,pmat,pmat);CHKERRQ(ierr);
831   }
832   if (!wasSetup && pc->setfromoptionscalled) {
833     /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
834     ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 /* ---------------------------------------------------------------------------------------------*/
840 static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
841 {
842   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
843   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
844   PetscErrorCode        ierr;
845   PetscInt              i;
846 
847   PetscFunctionBegin;
848   if (bjac && bjac->pmat) {
849     ierr = MatDestroyMatrices(jac->n_local,&bjac->pmat);CHKERRQ(ierr);
850     if (pc->useAmat) {
851       ierr = MatDestroyMatrices(jac->n_local,&bjac->mat);CHKERRQ(ierr);
852     }
853   }
854 
855   for (i=0; i<jac->n_local; i++) {
856     ierr = KSPReset(jac->ksp[i]);CHKERRQ(ierr);
857     if (bjac && bjac->x) {
858       ierr = VecDestroy(&bjac->x[i]);CHKERRQ(ierr);
859       ierr = VecDestroy(&bjac->y[i]);CHKERRQ(ierr);
860       ierr = ISDestroy(&bjac->is[i]);CHKERRQ(ierr);
861     }
862   }
863   ierr = PetscFree(jac->l_lens);CHKERRQ(ierr);
864   ierr = PetscFree(jac->g_lens);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
869 {
870   PC_BJacobi            *jac  = (PC_BJacobi*)pc->data;
871   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
872   PetscErrorCode        ierr;
873   PetscInt              i;
874 
875   PetscFunctionBegin;
876   ierr = PCReset_BJacobi_Multiblock(pc);CHKERRQ(ierr);
877   if (bjac) {
878     ierr = PetscFree2(bjac->x,bjac->y);CHKERRQ(ierr);
879     ierr = PetscFree(bjac->starts);CHKERRQ(ierr);
880     ierr = PetscFree(bjac->is);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(jac->data);CHKERRQ(ierr);
883   for (i=0; i<jac->n_local; i++) {
884     ierr = KSPDestroy(&jac->ksp[i]);CHKERRQ(ierr);
885   }
886   ierr = PetscFree(jac->ksp);CHKERRQ(ierr);
887   ierr = PetscFree(pc->data);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
892 {
893   PC_BJacobi         *jac = (PC_BJacobi*)pc->data;
894   PetscErrorCode     ierr;
895   PetscInt           i,n_local = jac->n_local;
896   KSPConvergedReason reason;
897 
898   PetscFunctionBegin;
899   for (i=0; i<n_local; i++) {
900     ierr = KSPSetUp(jac->ksp[i]);CHKERRQ(ierr);
901     ierr = KSPGetConvergedReason(jac->ksp[i],&reason);CHKERRQ(ierr);
902     if (reason == KSP_DIVERGED_PC_FAILED) {
903       pc->failedreason = PC_SUBPC_ERROR;
904     }
905   }
906   PetscFunctionReturn(0);
907 }
908 
909 /*
910       Preconditioner for block Jacobi
911 */
912 static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
913 {
914   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
915   PetscErrorCode        ierr;
916   PetscInt              i,n_local = jac->n_local;
917   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
918   PetscScalar           *yin;
919   const PetscScalar     *xin;
920 
921   PetscFunctionBegin;
922   ierr = VecGetArrayRead(x,&xin);CHKERRQ(ierr);
923   ierr = VecGetArray(y,&yin);CHKERRQ(ierr);
924   for (i=0; i<n_local; i++) {
925     /*
926        To avoid copying the subvector from x into a workspace we instead
927        make the workspace vector array point to the subpart of the array of
928        the global vector.
929     */
930     ierr = VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);CHKERRQ(ierr);
931     ierr = VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);CHKERRQ(ierr);
932 
933     ierr = PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);CHKERRQ(ierr);
934     ierr = KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]);CHKERRQ(ierr);
935     ierr = KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]);CHKERRQ(ierr);
936     ierr = PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);CHKERRQ(ierr);
937 
938     ierr = VecResetArray(bjac->x[i]);CHKERRQ(ierr);
939     ierr = VecResetArray(bjac->y[i]);CHKERRQ(ierr);
940   }
941   ierr = VecRestoreArrayRead(x,&xin);CHKERRQ(ierr);
942   ierr = VecRestoreArray(y,&yin);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 /*
947       Preconditioner for block Jacobi
948 */
949 static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
950 {
951   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
952   PetscErrorCode        ierr;
953   PetscInt              i,n_local = jac->n_local;
954   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
955   PetscScalar           *yin;
956   const PetscScalar     *xin;
957 
958   PetscFunctionBegin;
959   ierr = VecGetArrayRead(x,&xin);CHKERRQ(ierr);
960   ierr = VecGetArray(y,&yin);CHKERRQ(ierr);
961   for (i=0; i<n_local; i++) {
962     /*
963        To avoid copying the subvector from x into a workspace we instead
964        make the workspace vector array point to the subpart of the array of
965        the global vector.
966     */
967     ierr = VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);CHKERRQ(ierr);
968     ierr = VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);CHKERRQ(ierr);
969 
970     ierr = PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);CHKERRQ(ierr);
971     ierr = KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]);CHKERRQ(ierr);
972     ierr = KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]);CHKERRQ(ierr);
973     ierr = PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);CHKERRQ(ierr);
974 
975     ierr = VecResetArray(bjac->x[i]);CHKERRQ(ierr);
976     ierr = VecResetArray(bjac->y[i]);CHKERRQ(ierr);
977   }
978   ierr = VecRestoreArrayRead(x,&xin);CHKERRQ(ierr);
979   ierr = VecRestoreArray(y,&yin);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
984 {
985   PC_BJacobi            *jac = (PC_BJacobi*)pc->data;
986   PetscErrorCode        ierr;
987   PetscInt              m,n_local,N,M,start,i;
988   const char            *prefix,*pprefix,*mprefix;
989   KSP                   ksp;
990   Vec                   x,y;
991   PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
992   PC                    subpc;
993   IS                    is;
994   MatReuse              scall;
995 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
996   PetscBool              is_gpumatrix = PETSC_FALSE;
997 #endif
998 
999   PetscFunctionBegin;
1000   ierr = MatGetLocalSize(pc->pmat,&M,&N);CHKERRQ(ierr);
1001 
1002   n_local = jac->n_local;
1003 
1004   if (pc->useAmat) {
1005     PetscBool same;
1006     ierr = PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same);CHKERRQ(ierr);
1007     if (!same) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
1008   }
1009 
1010   if (!pc->setupcalled) {
1011     scall = MAT_INITIAL_MATRIX;
1012 
1013     if (!jac->ksp) {
1014       pc->ops->reset         = PCReset_BJacobi_Multiblock;
1015       pc->ops->destroy       = PCDestroy_BJacobi_Multiblock;
1016       pc->ops->apply         = PCApply_BJacobi_Multiblock;
1017       pc->ops->matapply      = NULL;
1018       pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
1019       pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;
1020 
1021       ierr = PetscNewLog(pc,&bjac);CHKERRQ(ierr);
1022       ierr = PetscMalloc1(n_local,&jac->ksp);CHKERRQ(ierr);
1023       ierr = PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP)));CHKERRQ(ierr);
1024       ierr = PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y);CHKERRQ(ierr);
1025       ierr = PetscMalloc1(n_local,&bjac->starts);CHKERRQ(ierr);
1026       ierr = PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar)));CHKERRQ(ierr);
1027 
1028       jac->data = (void*)bjac;
1029       ierr      = PetscMalloc1(n_local,&bjac->is);CHKERRQ(ierr);
1030       ierr      = PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS)));CHKERRQ(ierr);
1031 
1032       for (i=0; i<n_local; i++) {
1033         ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
1034         ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
1035         ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
1036         ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
1037         ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
1038         ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
1039         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
1040         ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
1041         ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
1042 
1043         jac->ksp[i] = ksp;
1044       }
1045     } else {
1046       bjac = (PC_BJacobi_Multiblock*)jac->data;
1047     }
1048 
1049     start = 0;
1050     for (i=0; i<n_local; i++) {
1051       m = jac->l_lens[i];
1052       /*
1053       The reason we need to generate these vectors is to serve
1054       as the right-hand side and solution vector for the solve on the
1055       block. We do not need to allocate space for the vectors since
1056       that is provided via VecPlaceArray() just before the call to
1057       KSPSolve() on the block.
1058 
1059       */
1060       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&x);CHKERRQ(ierr);
1061       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y);CHKERRQ(ierr);
1062 #if defined(PETSC_HAVE_CUDA)
1063       ierr = PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
1064       if (is_gpumatrix) {
1065         ierr = VecSetType(x,VECCUDA);CHKERRQ(ierr);
1066         ierr = VecSetType(y,VECCUDA);CHKERRQ(ierr);
1067       }
1068 #endif
1069 #if defined(PETSC_HAVE_VIENNACL)
1070       ierr = PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");CHKERRQ(ierr);
1071       if (is_gpumatrix) {
1072         ierr = VecSetType(x,VECVIENNACL);CHKERRQ(ierr);
1073         ierr = VecSetType(y,VECVIENNACL);CHKERRQ(ierr);
1074       }
1075 #endif
1076       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)x);CHKERRQ(ierr);
1077       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)y);CHKERRQ(ierr);
1078 
1079       bjac->x[i]      = x;
1080       bjac->y[i]      = y;
1081       bjac->starts[i] = start;
1082 
1083       ierr        = ISCreateStride(PETSC_COMM_SELF,m,start,1,&is);CHKERRQ(ierr);
1084       bjac->is[i] = is;
1085       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)is);CHKERRQ(ierr);
1086 
1087       start += m;
1088     }
1089   } else {
1090     bjac = (PC_BJacobi_Multiblock*)jac->data;
1091     /*
1092        Destroy the blocks from the previous iteration
1093     */
1094     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1095       ierr = MatDestroyMatrices(n_local,&bjac->pmat);CHKERRQ(ierr);
1096       if (pc->useAmat) {
1097         ierr = MatDestroyMatrices(n_local,&bjac->mat);CHKERRQ(ierr);
1098       }
1099       scall = MAT_INITIAL_MATRIX;
1100     } else scall = MAT_REUSE_MATRIX;
1101   }
1102 
1103   ierr = MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat);CHKERRQ(ierr);
1104   if (pc->useAmat) {
1105     ierr = MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat);CHKERRQ(ierr);
1106   }
1107   /* Return control to the user so that the submatrices can be modified (e.g., to apply
1108      different boundary conditions for the submatrices than for the global problem) */
1109   ierr = PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
1110 
1111   ierr = PetscObjectGetOptionsPrefix((PetscObject)pmat,&pprefix);CHKERRQ(ierr);
1112   for (i=0; i<n_local; i++) {
1113     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]);CHKERRQ(ierr);
1114     ierr = PetscObjectSetOptionsPrefix((PetscObject)bjac->pmat[i],pprefix);CHKERRQ(ierr);
1115     if (pc->useAmat) {
1116       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]);CHKERRQ(ierr);
1117       ierr = PetscObjectGetOptionsPrefix((PetscObject)mat,&mprefix);CHKERRQ(ierr);
1118       ierr = PetscObjectSetOptionsPrefix((PetscObject)bjac->mat[i],mprefix);CHKERRQ(ierr);
1119       ierr = KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]);CHKERRQ(ierr);
1120     } else {
1121       ierr = KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]);CHKERRQ(ierr);
1122     }
1123     if (pc->setfromoptionscalled) {
1124       ierr = KSPSetFromOptions(jac->ksp[i]);CHKERRQ(ierr);
1125     }
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 /* ---------------------------------------------------------------------------------------------*/
1131 /*
1132       These are for a single block with multiple processes;
1133 */
1134 static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1135 {
1136   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1137   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1138   PetscErrorCode       ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = VecDestroy(&mpjac->ysub);CHKERRQ(ierr);
1142   ierr = VecDestroy(&mpjac->xsub);CHKERRQ(ierr);
1143   ierr = MatDestroy(&mpjac->submats);CHKERRQ(ierr);
1144   if (jac->ksp) {ierr = KSPReset(jac->ksp[0]);CHKERRQ(ierr);}
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1149 {
1150   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1151   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1152   PetscErrorCode       ierr;
1153 
1154   PetscFunctionBegin;
1155   ierr = PCReset_BJacobi_Multiproc(pc);CHKERRQ(ierr);
1156   ierr = KSPDestroy(&jac->ksp[0]);CHKERRQ(ierr);
1157   ierr = PetscFree(jac->ksp);CHKERRQ(ierr);
1158   ierr = PetscSubcommDestroy(&mpjac->psubcomm);CHKERRQ(ierr);
1159 
1160   ierr = PetscFree(mpjac);CHKERRQ(ierr);
1161   ierr = PetscFree(pc->data);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1166 {
1167   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1168   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1169   PetscErrorCode       ierr;
1170   PetscScalar          *yarray;
1171   const PetscScalar    *xarray;
1172   KSPConvergedReason   reason;
1173 
1174   PetscFunctionBegin;
1175   /* place x's and y's local arrays into xsub and ysub */
1176   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
1177   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
1178   ierr = VecPlaceArray(mpjac->xsub,xarray);CHKERRQ(ierr);
1179   ierr = VecPlaceArray(mpjac->ysub,yarray);CHKERRQ(ierr);
1180 
1181   /* apply preconditioner on each matrix block */
1182   ierr = PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);CHKERRQ(ierr);
1183   ierr = KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub);CHKERRQ(ierr);
1184   ierr = KSPCheckSolve(jac->ksp[0],pc,mpjac->ysub);CHKERRQ(ierr);
1185   ierr = PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);CHKERRQ(ierr);
1186   ierr = KSPGetConvergedReason(jac->ksp[0],&reason);CHKERRQ(ierr);
1187   if (reason == KSP_DIVERGED_PC_FAILED) {
1188     pc->failedreason = PC_SUBPC_ERROR;
1189   }
1190 
1191   ierr = VecResetArray(mpjac->xsub);CHKERRQ(ierr);
1192   ierr = VecResetArray(mpjac->ysub);CHKERRQ(ierr);
1193   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
1194   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc,Mat X,Mat Y)
1199 {
1200   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1201   KSPConvergedReason   reason;
1202   Mat                  sX,sY;
1203   const PetscScalar    *x;
1204   PetscScalar          *y;
1205   PetscInt             m,N,lda,ldb;
1206   PetscErrorCode       ierr;
1207 
1208   PetscFunctionBegin;
1209   /* apply preconditioner on each matrix block */
1210   ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1211   ierr = MatGetSize(X,NULL,&N);CHKERRQ(ierr);
1212   ierr = MatDenseGetLDA(X,&lda);CHKERRQ(ierr);
1213   ierr = MatDenseGetLDA(Y,&ldb);CHKERRQ(ierr);
1214   ierr = MatDenseGetArrayRead(X,&x);CHKERRQ(ierr);
1215   ierr = MatDenseGetArrayWrite(Y,&y);CHKERRQ(ierr);
1216   ierr = MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,(PetscScalar*)x,&sX);CHKERRQ(ierr);
1217   ierr = MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,y,&sY);CHKERRQ(ierr);
1218   ierr = MatDenseSetLDA(sX,lda);CHKERRQ(ierr);
1219   ierr = MatDenseSetLDA(sY,ldb);CHKERRQ(ierr);
1220   ierr = PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0);CHKERRQ(ierr);
1221   ierr = KSPMatSolve(jac->ksp[0],sX,sY);CHKERRQ(ierr);
1222   ierr = KSPCheckSolve(jac->ksp[0],pc,NULL);CHKERRQ(ierr);
1223   ierr = PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0);CHKERRQ(ierr);
1224   ierr = MatDestroy(&sY);CHKERRQ(ierr);
1225   ierr = MatDestroy(&sX);CHKERRQ(ierr);
1226   ierr = MatDenseRestoreArrayWrite(Y,&y);CHKERRQ(ierr);
1227   ierr = MatDenseRestoreArrayRead(X,&x);CHKERRQ(ierr);
1228   ierr = KSPGetConvergedReason(jac->ksp[0],&reason);CHKERRQ(ierr);
1229   if (reason == KSP_DIVERGED_PC_FAILED) {
1230     pc->failedreason = PC_SUBPC_ERROR;
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1236 {
1237   PC_BJacobi           *jac   = (PC_BJacobi*)pc->data;
1238   PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1239   PetscErrorCode       ierr;
1240   PetscInt             m,n;
1241   MPI_Comm             comm,subcomm=0;
1242   const char           *prefix;
1243   PetscBool            wasSetup = PETSC_TRUE;
1244 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
1245   PetscBool              is_gpumatrix = PETSC_FALSE;
1246 #endif
1247 
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1251   if (jac->n_local > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1252   jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1253   if (!pc->setupcalled) {
1254     wasSetup  = PETSC_FALSE;
1255     ierr      = PetscNewLog(pc,&mpjac);CHKERRQ(ierr);
1256     jac->data = (void*)mpjac;
1257 
1258     /* initialize datastructure mpjac */
1259     if (!jac->psubcomm) {
1260       /* Create default contiguous subcommunicatiors if user does not provide them */
1261       ierr = PetscSubcommCreate(comm,&jac->psubcomm);CHKERRQ(ierr);
1262       ierr = PetscSubcommSetNumber(jac->psubcomm,jac->n);CHKERRQ(ierr);
1263       ierr = PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
1264       ierr = PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm));CHKERRQ(ierr);
1265     }
1266     mpjac->psubcomm = jac->psubcomm;
1267     subcomm         = PetscSubcommChild(mpjac->psubcomm);
1268 
1269     /* Get matrix blocks of pmat */
1270     ierr = MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);CHKERRQ(ierr);
1271 
1272     /* create a new PC that processors in each subcomm have copy of */
1273     ierr = PetscMalloc1(1,&jac->ksp);CHKERRQ(ierr);
1274     ierr = KSPCreate(subcomm,&jac->ksp[0]);CHKERRQ(ierr);
1275     ierr = KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure);CHKERRQ(ierr);
1276     ierr = PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1);CHKERRQ(ierr);
1277     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]);CHKERRQ(ierr);
1278     ierr = KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);CHKERRQ(ierr);
1279     ierr = KSPGetPC(jac->ksp[0],&mpjac->pc);CHKERRQ(ierr);
1280 
1281     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
1282     ierr = KSPSetOptionsPrefix(jac->ksp[0],prefix);CHKERRQ(ierr);
1283     ierr = KSPAppendOptionsPrefix(jac->ksp[0],"sub_");CHKERRQ(ierr);
1284 
1285     /* create dummy vectors xsub and ysub */
1286     ierr = MatGetLocalSize(mpjac->submats,&m,&n);CHKERRQ(ierr);
1287     ierr = VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub);CHKERRQ(ierr);
1288     ierr = VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub);CHKERRQ(ierr);
1289 #if defined(PETSC_HAVE_CUDA)
1290     ierr = PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
1291     if (is_gpumatrix) {
1292       ierr = VecSetType(mpjac->xsub,VECMPICUDA);CHKERRQ(ierr);
1293       ierr = VecSetType(mpjac->ysub,VECMPICUDA);CHKERRQ(ierr);
1294     }
1295 #endif
1296 #if defined(PETSC_HAVE_VIENNACL)
1297     ierr = PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJVIENNACL,MATMPIAIJVIENNACL,"");CHKERRQ(ierr);
1298     if (is_gpumatrix) {
1299       ierr = VecSetType(mpjac->xsub,VECMPIVIENNACL);CHKERRQ(ierr);
1300       ierr = VecSetType(mpjac->ysub,VECMPIVIENNACL);CHKERRQ(ierr);
1301     }
1302 #endif
1303     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub);CHKERRQ(ierr);
1304     ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub);CHKERRQ(ierr);
1305 
1306     pc->ops->reset   = PCReset_BJacobi_Multiproc;
1307     pc->ops->destroy = PCDestroy_BJacobi_Multiproc;
1308     pc->ops->apply   = PCApply_BJacobi_Multiproc;
1309     pc->ops->matapply= PCMatApply_BJacobi_Multiproc;
1310   } else { /* pc->setupcalled */
1311     subcomm = PetscSubcommChild(mpjac->psubcomm);
1312     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1313       /* destroy old matrix blocks, then get new matrix blocks */
1314       if (mpjac->submats) {ierr = MatDestroy(&mpjac->submats);CHKERRQ(ierr);}
1315       ierr = MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);CHKERRQ(ierr);
1316     } else {
1317       ierr = MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats);CHKERRQ(ierr);
1318     }
1319     ierr = KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);CHKERRQ(ierr);
1320   }
1321 
1322   if (!wasSetup && pc->setfromoptionscalled) {
1323     ierr = KSPSetFromOptions(jac->ksp[0]);CHKERRQ(ierr);
1324   }
1325   PetscFunctionReturn(0);
1326 }
1327