xref: /petsc/src/ksp/pc/impls/asm/asm.c (revision e3caeda681d93b7b1d053090fe6dee7657faa56d)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an additive Schwarz preconditioner for any Mat implementation.
5 
6   Note that each processor may have any number of subdomains. But in order to
7   deal easily with the VecScatter(), we treat each processor as if it has the
8   same number of subdomains.
9 
10        n - total number of true subdomains on all processors
11        n_local_true - actual number of subdomains on this processor
12        n_local = maximum over all processors of n_local_true
13 */
14 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
15 
16 typedef struct {
17   PetscInt   n,n_local,n_local_true;
18   PetscInt   overlap;             /* overlap requested by user */
19   KSP        *ksp;                /* linear solvers for each block */
20   VecScatter *scat;               /* mapping to subregion */
21   Vec        *x,*y;               /* work vectors */
22   IS         *is;                 /* index set that defines each subdomain */
23   Mat        *mat,*pmat;          /* mat is not currently used */
24   PCASMType  type;                /* use reduced interpolation, restriction or both */
25   PetscTruth type_set;            /* if user set this value (so won't change it for symmetric problems) */
26   PetscTruth same_local_solves;   /* flag indicating whether all local solvers are same */
27   PetscTruth inplace;             /* indicates that the sub-matrices are deleted after
28                                      PCSetUpOnBlocks() is done. Similar to inplace
29                                      factorization in the case of LU and ILU */
30 } PC_ASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCView_ASM"
34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35 {
36   PC_ASM         *osm = (PC_ASM*)pc->data;
37   PetscErrorCode ierr;
38   PetscMPIInt    rank;
39   PetscInt       i,bsz;
40   PetscTruth     iascii,isstring;
41   PetscViewer    sviewer;
42 
43 
44   PetscFunctionBegin;
45   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
46   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
47   if (iascii) {
48     if (osm->n > 0) {
49       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr);
50     } else {
51       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr);
54     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
55     if (osm->same_local_solves) {
56       if (osm->ksp) {
57 	ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr);
58 	ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
59 	if (!rank) {
60 	  ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
61 	  ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
62 	  ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
63 	}
64 	ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
65       }
66     } else {
67       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr);
68       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
69       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
70       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
71       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
72       for (i=0; i<osm->n_local; i++) {
73         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
74         if (i < osm->n_local_true) {
75 	  ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
76           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr);
77           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
78           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
79         }
80         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
81       }
82       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
83       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
84     }
85   } else if (isstring) {
86     ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr);
87     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
88     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
89     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
90   } else {
91     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "PCASMPrintSubdomains"
98 static PetscErrorCode PCASMPrintSubdomains(PC pc)
99 {
100   PC_ASM         *osm  = (PC_ASM*)pc->data;
101   const char     *prefix;
102   char           fname[PETSC_MAX_PATH_LEN+1];
103   PetscViewer    viewer;
104   PetscInt       i,j,nidx;
105   const PetscInt *idx;
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
109   ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",
110 			       fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr);
111   if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
112   ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr);
113   for (i=0;i<osm->n_local_true;i++) {
114     ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr);
115     ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr);
116     for (j=0; j<nidx; j++) {
117       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
118     }
119     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
120     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
121   }
122   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
123   ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "PCSetUp_ASM"
129 static PetscErrorCode PCSetUp_ASM(PC pc)
130 {
131   PC_ASM         *osm  = (PC_ASM*)pc->data;
132   PetscErrorCode ierr;
133   PetscTruth     symset,flg;
134   PetscInt       i,m;
135   PetscMPIInt    size;
136   MatReuse       scall = MAT_REUSE_MATRIX;
137   IS             isl;
138   KSP            ksp;
139   PC             subpc;
140   const char     *prefix,*pprefix;
141   Vec            vec;
142 
143   PetscFunctionBegin;
144   if (!pc->setupcalled) {
145 
146     if (!osm->type_set) {
147       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
148       if (symset && flg) { osm->type = PC_ASM_BASIC; }
149     }
150 
151     if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) {
152       /* no subdomains given, use one per processor */
153       osm->n_local = osm->n_local_true = 1;
154       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
155       osm->n = size;
156     } else if (osm->n == PETSC_DECIDE) {
157       /* determine global number of subdomains */
158       PetscInt inwork[2],outwork[2];
159       inwork[0] = inwork[1] = osm->n_local_true;
160       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
161       osm->n_local = outwork[0];
162       osm->n       = outwork[1];
163     }
164 
165     if (!osm->is){ /* create the index sets */
166       ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr);
167     }
168     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
169     flg  = PETSC_FALSE;
170     ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
171     if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); }
172 
173     /*  Extend the "overlapping" regions by a number of steps  */
174     ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr);
175     for (i=0; i<osm->n_local_true; i++) {
176       ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
177     }
178 
179     /* Create the local work vectors and scatter contexts */
180     ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
181     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->scat);CHKERRQ(ierr);
182     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr);
183     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr);
184     for (i=0; i<osm->n_local_true; i++) {
185       ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr);
186       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr);
187       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
188       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
189       ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr);
190       ierr = ISDestroy(isl);CHKERRQ(ierr);
191     }
192     for (i=osm->n_local_true; i<osm->n_local; i++) {
193       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr);
194       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
195       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr);
196       ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr);
197       ierr = ISDestroy(isl);CHKERRQ(ierr);
198     }
199     ierr = VecDestroy(vec);CHKERRQ(ierr);
200 
201     /* Create the local solvers */
202     ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
203     for (i=0; i<osm->n_local_true; i++) {
204       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
205       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
206       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
207       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
208       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
209       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
210       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
211       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
212       osm->ksp[i] = ksp;
213     }
214     scall = MAT_INITIAL_MATRIX;
215 
216   } else {
217     /*
218        Destroy the blocks from the previous iteration
219     */
220     if (pc->flag == DIFFERENT_NONZERO_PATTERN || osm->inplace) {
221       if (!osm->inplace) {
222 	ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
223       }
224       scall = MAT_INITIAL_MATRIX;
225     }
226   }
227 
228   /*
229      Extract out the submatrices
230   */
231   ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr);
232   if (scall == MAT_INITIAL_MATRIX) {
233     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
234     for (i=0; i<osm->n_local_true; i++) {
235       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
236       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
237     }
238   }
239 
240   /* Return control to the user so that the submatrices can be modified (e.g., to apply
241      different boundary conditions for the submatrices than for the global problem) */
242   ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
243 
244   /*
245      Loop over subdomains putting them into local ksp
246   */
247   for (i=0; i<osm->n_local_true; i++) {
248     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
249     if (!pc->setupcalled) {
250       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
251     }
252   }
253 
254   PetscFunctionReturn(0);
255 }
256 
257 #undef __FUNCT__
258 #define __FUNCT__ "PCSetUpOnBlocks_ASM"
259 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
260 {
261   PC_ASM         *osm = (PC_ASM*)pc->data;
262   PetscErrorCode ierr;
263   PetscInt       i;
264 
265   PetscFunctionBegin;
266   for (i=0; i<osm->n_local_true; i++) {
267     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
268   }
269   /*
270      If inplace flag is set, then destroy the matrix after the setup
271      on blocks is done.
272   */
273   if (osm->inplace && osm->n_local_true > 0) {
274     ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "PCApply_ASM"
281 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
282 {
283   PC_ASM         *osm = (PC_ASM*)pc->data;
284   PetscErrorCode ierr;
285   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
286   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
287 
288   PetscFunctionBegin;
289   /*
290      Support for limiting the restriction or interpolation to only local
291      subdomain values (leaving the other values 0).
292   */
293   if (!(osm->type & PC_ASM_RESTRICT)) {
294     forward = SCATTER_FORWARD_LOCAL;
295     /* have to zero the work RHS since scatter may leave some slots empty */
296     for (i=0; i<n_local_true; i++) {
297       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
298     }
299   }
300   if (!(osm->type & PC_ASM_INTERPOLATE)) {
301     reverse = SCATTER_REVERSE_LOCAL;
302   }
303 
304   for (i=0; i<n_local; i++) {
305     ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
306   }
307   ierr = VecSet(y,0.0);CHKERRQ(ierr);
308   /* do the local solves */
309   for (i=0; i<n_local_true; i++) {
310     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
311     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
312     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
313   }
314   /* handle the rest of the scatters that do not have local solves */
315   for (i=n_local_true; i<n_local; i++) {
316     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
317     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
318   }
319   for (i=0; i<n_local; i++) {
320     ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "PCApplyTranspose_ASM"
327 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
328 {
329   PC_ASM         *osm = (PC_ASM*)pc->data;
330   PetscErrorCode ierr;
331   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
332   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
333 
334   PetscFunctionBegin;
335   /*
336      Support for limiting the restriction or interpolation to only local
337      subdomain values (leaving the other values 0).
338 
339      Note: these are reversed from the PCApply_ASM() because we are applying the
340      transpose of the three terms
341   */
342   if (!(osm->type & PC_ASM_INTERPOLATE)) {
343     forward = SCATTER_FORWARD_LOCAL;
344     /* have to zero the work RHS since scatter may leave some slots empty */
345     for (i=0; i<n_local_true; i++) {
346       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
347     }
348   }
349   if (!(osm->type & PC_ASM_RESTRICT)) {
350     reverse = SCATTER_REVERSE_LOCAL;
351   }
352 
353   for (i=0; i<n_local; i++) {
354     ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
355   }
356   ierr = VecSet(y,0.0);CHKERRQ(ierr);
357   /* do the local solves */
358   for (i=0; i<n_local_true; i++) {
359     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
360     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
361     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
362   }
363   /* handle the rest of the scatters that do not have local solves */
364   for (i=n_local_true; i<n_local; i++) {
365     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
366     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
367   }
368   for (i=0; i<n_local; i++) {
369     ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
370   }
371   PetscFunctionReturn(0);
372 }
373 
374 #undef __FUNCT__
375 #define __FUNCT__ "PCDestroy_ASM"
376 static PetscErrorCode PCDestroy_ASM(PC pc)
377 {
378   PC_ASM         *osm = (PC_ASM*)pc->data;
379   PetscErrorCode ierr;
380   PetscInt       i;
381 
382   PetscFunctionBegin;
383   if (osm->ksp) {
384     for (i=0; i<osm->n_local_true; i++) {
385       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
386     }
387     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
388   }
389   if (osm->pmat && !osm->inplace) {
390     if (osm->n_local_true > 0) {
391       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
392     }
393   }
394   if (osm->scat) {
395     for (i=0; i<osm->n_local; i++) {
396       ierr = VecScatterDestroy(osm->scat[i]);CHKERRQ(ierr);
397       ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
398       ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
399     }
400     ierr = PetscFree(osm->scat);CHKERRQ(ierr);
401     ierr = PetscFree(osm->x);CHKERRQ(ierr);
402     ierr = PetscFree(osm->y);CHKERRQ(ierr);
403   }
404   if (osm->is) {
405     ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is);CHKERRQ(ierr);
406   }
407   ierr = PetscFree(osm);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "PCSetFromOptions_ASM"
413 static PetscErrorCode PCSetFromOptions_ASM(PC pc)
414 {
415   PC_ASM         *osm = (PC_ASM*)pc->data;
416   PetscErrorCode ierr;
417   PetscInt       blocks,ovl;
418   PetscTruth     symset,flg;
419   PCASMType      asmtype;
420 
421   PetscFunctionBegin;
422   /* set the type to symmetric if matrix is symmetric */
423   if (!osm->type_set && pc->pmat) {
424     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
425     if (symset && flg) { osm->type = PC_ASM_BASIC; }
426   }
427   ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr);
428     ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
429     if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL);CHKERRQ(ierr); }
430     ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
431     if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
432     flg  = PETSC_FALSE;
433     ierr = PetscOptionsTruth("-pc_asm_in_place","Perform matrix factorization inplace","PCASMSetUseInPlace",flg,&flg,PETSC_NULL);CHKERRQ(ierr);
434     if (flg) {ierr = PCASMSetUseInPlace(pc);CHKERRQ(ierr); }
435     ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr);
436     if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); }
437   ierr = PetscOptionsTail();CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 /*------------------------------------------------------------------------------------*/
442 
443 EXTERN_C_BEGIN
444 #undef __FUNCT__
445 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM"
446 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[])
447 {
448   PC_ASM         *osm = (PC_ASM*)pc->data;
449   PetscErrorCode ierr;
450   PetscInt       i;
451 
452   PetscFunctionBegin;
453   if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
454   if (pc->setupcalled && (n != osm->n_local_true || is)) {
455     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup().");
456   }
457   if (!pc->setupcalled) {
458     if (is) {
459       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
460     }
461     if (osm->is) {
462       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is);CHKERRQ(ierr);
463     }
464     osm->n_local_true = n;
465     osm->is           = 0;
466     if (is) {
467       ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr);
468       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
469     }
470   }
471   PetscFunctionReturn(0);
472 }
473 EXTERN_C_END
474 
475 EXTERN_C_BEGIN
476 #undef __FUNCT__
477 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM"
478 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is)
479 {
480   PC_ASM         *osm = (PC_ASM*)pc->data;
481   PetscErrorCode ierr;
482   PetscMPIInt    rank,size;
483   PetscInt       n;
484 
485   PetscFunctionBegin;
486   if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
487   if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
488 
489   /*
490      Split the subdomains equally amoung all processors
491   */
492   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
493   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
494   n = N/size + ((N % size) > rank);
495   if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
496   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup().");
497   if (!pc->setupcalled) {
498     if (osm->is) {
499       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is);CHKERRQ(ierr);
500     }
501     osm->n_local_true = n;
502     osm->is           = 0;
503   }
504   PetscFunctionReturn(0);
505 }
506 EXTERN_C_END
507 
508 EXTERN_C_BEGIN
509 #undef __FUNCT__
510 #define __FUNCT__ "PCASMSetOverlap_ASM"
511 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
512 {
513   PC_ASM *osm = (PC_ASM*)pc->data;
514 
515   PetscFunctionBegin;
516   if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
517   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup().");
518   if (!pc->setupcalled) {
519     osm->overlap = ovl;
520   }
521   PetscFunctionReturn(0);
522 }
523 EXTERN_C_END
524 
525 EXTERN_C_BEGIN
526 #undef __FUNCT__
527 #define __FUNCT__ "PCASMSetType_ASM"
528 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type)
529 {
530   PC_ASM *osm = (PC_ASM*)pc->data;
531 
532   PetscFunctionBegin;
533   osm->type     = type;
534   osm->type_set = PETSC_TRUE;
535   PetscFunctionReturn(0);
536 }
537 EXTERN_C_END
538 
539 EXTERN_C_BEGIN
540 #undef __FUNCT__
541 #define __FUNCT__ "PCASMGetSubKSP_ASM"
542 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
543 {
544   PC_ASM         *osm = (PC_ASM*)pc->data;
545   PetscErrorCode ierr;
546 
547   PetscFunctionBegin;
548   if (osm->n_local_true < 1) {
549     SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
550   }
551 
552   if (n_local) {
553     *n_local = osm->n_local_true;
554   }
555   if (first_local) {
556     ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
557     *first_local -= osm->n_local_true;
558   }
559   if (ksp) {
560     /* Assume that local solves are now different; not necessarily
561        true though!  This flag is used only for PCView_ASM() */
562     *ksp                   = osm->ksp;
563     osm->same_local_solves = PETSC_FALSE;
564   }
565   PetscFunctionReturn(0);
566 }
567 EXTERN_C_END
568 
569 EXTERN_C_BEGIN
570 #undef __FUNCT__
571 #define __FUNCT__ "PCASMSetUseInPlace_ASM"
572 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace_ASM(PC pc)
573 {
574   PC_ASM *osm = (PC_ASM*)pc->data;
575 
576   PetscFunctionBegin;
577   osm->inplace = PETSC_TRUE;
578   PetscFunctionReturn(0);
579 }
580 EXTERN_C_END
581 
582 /*----------------------------------------------------------------------------*/
583 #undef __FUNCT__
584 #define __FUNCT__ "PCASMSetUseInPlace"
585 /*@
586    PCASMSetUseInPlace - Tells the system to destroy the matrix after setup is done.
587 
588    Collective on PC
589 
590    Input Parameters:
591 .  pc - the preconditioner context
592 
593    Options Database Key:
594 .  -pc_asm_in_place - Activates in-place factorization
595 
596    Note:
597    PCASMSetUseInplace() can only be used with the KSP method KSPPREONLY, and
598    when the original matrix is not required during the Solve process.
599    This destroys the matrix, early thus, saving on memory usage.
600 
601    Level: intermediate
602 
603 .keywords: PC, set, factorization, direct, inplace, in-place, ASM
604 
605 .seealso: PCFactorSetUseInPlace()
606 @*/
607 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace(PC pc)
608 {
609   PetscErrorCode ierr,(*f)(PC);
610 
611   PetscFunctionBegin;
612   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
613   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetUseInPlace_C",(void (**)(void))&f);CHKERRQ(ierr);
614   if (f) {
615     ierr = (*f)(pc);CHKERRQ(ierr);
616   }
617   PetscFunctionReturn(0);
618 }
619 /*----------------------------------------------------------------------------*/
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "PCASMSetLocalSubdomains"
623 /*@C
624     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor
625     only) for the additive Schwarz preconditioner.
626 
627     Collective on PC
628 
629     Input Parameters:
630 +   pc - the preconditioner context
631 .   n - the number of subdomains for this processor (default value = 1)
632 -   is - the index sets that define the subdomains for this processor
633          (or PETSC_NULL for PETSc to determine subdomains)
634 
635     Notes:
636     The IS numbering is in the parallel, global numbering of the vector.
637 
638     By default the ASM preconditioner uses 1 block per processor.
639 
640     These index sets cannot be destroyed until after completion of the
641     linear solves for which the ASM preconditioner is being used.
642 
643     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
644 
645     Level: advanced
646 
647 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
648 
649 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
650           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
651 @*/
652 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[])
653 {
654   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[]);
655 
656   PetscFunctionBegin;
657   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
658   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
659   if (f) {
660     ierr = (*f)(pc,n,is);CHKERRQ(ierr);
661   }
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "PCASMSetTotalSubdomains"
667 /*@C
668     PCASMSetTotalSubdomains - Sets the subdomains for all processor for the
669     additive Schwarz preconditioner.  Either all or no processors in the
670     PC communicator must call this routine, with the same index sets.
671 
672     Collective on PC
673 
674     Input Parameters:
675 +   pc - the preconditioner context
676 .   n - the number of subdomains for all processors
677 -   is - the index sets that define the subdomains for all processor
678          (or PETSC_NULL for PETSc to determine subdomains)
679 
680     Options Database Key:
681     To set the total number of subdomain blocks rather than specify the
682     index sets, use the option
683 .    -pc_asm_blocks <blks> - Sets total blocks
684 
685     Notes:
686     Currently you cannot use this to set the actual subdomains with the argument is.
687 
688     By default the ASM preconditioner uses 1 block per processor.
689 
690     These index sets cannot be destroyed until after completion of the
691     linear solves for which the ASM preconditioner is being used.
692 
693     Use PCASMSetLocalSubdomains() to set local subdomains.
694 
695     Level: advanced
696 
697 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
698 
699 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
700           PCASMCreateSubdomains2D()
701 @*/
702 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS *is)
703 {
704   PetscErrorCode ierr,(*f)(PC,PetscInt,IS *);
705 
706   PetscFunctionBegin;
707   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
708   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
709   if (f) {
710     ierr = (*f)(pc,N,is);CHKERRQ(ierr);
711   }
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "PCASMSetOverlap"
717 /*@
718     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
719     additive Schwarz preconditioner.  Either all or no processors in the
720     PC communicator must call this routine.
721 
722     Collective on PC
723 
724     Input Parameters:
725 +   pc  - the preconditioner context
726 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
727 
728     Options Database Key:
729 .   -pc_asm_overlap <ovl> - Sets overlap
730 
731     Notes:
732     By default the ASM preconditioner uses 1 block per processor.  To use
733     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
734     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
735 
736     The overlap defaults to 1, so if one desires that no additional
737     overlap be computed beyond what may have been set with a call to
738     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
739     must be set to be 0.  In particular, if one does not explicitly set
740     the subdomains an application code, then all overlap would be computed
741     internally by PETSc, and using an overlap of 0 would result in an ASM
742     variant that is equivalent to the block Jacobi preconditioner.
743 
744     Note that one can define initial index sets with any overlap via
745     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
746     PCASMSetOverlap() merely allows PETSc to extend that overlap further
747     if desired.
748 
749     Level: intermediate
750 
751 .keywords: PC, ASM, set, overlap
752 
753 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
754           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
755 @*/
756 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl)
757 {
758   PetscErrorCode ierr,(*f)(PC,PetscInt);
759 
760   PetscFunctionBegin;
761   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
762   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr);
763   if (f) {
764     ierr = (*f)(pc,ovl);CHKERRQ(ierr);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "PCASMSetType"
771 /*@
772     PCASMSetType - Sets the type of restriction and interpolation used
773     for local problems in the additive Schwarz method.
774 
775     Collective on PC
776 
777     Input Parameters:
778 +   pc  - the preconditioner context
779 -   type - variant of ASM, one of
780 .vb
781       PC_ASM_BASIC       - full interpolation and restriction
782       PC_ASM_RESTRICT    - full restriction, local processor interpolation
783       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
784       PC_ASM_NONE        - local processor restriction and interpolation
785 .ve
786 
787     Options Database Key:
788 .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
789 
790     Level: intermediate
791 
792 .keywords: PC, ASM, set, type
793 
794 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
795           PCASMCreateSubdomains2D()
796 @*/
797 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type)
798 {
799   PetscErrorCode ierr,(*f)(PC,PCASMType);
800 
801   PetscFunctionBegin;
802   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
803   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr);
804   if (f) {
805     ierr = (*f)(pc,type);CHKERRQ(ierr);
806   }
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "PCASMGetSubKSP"
812 /*@C
813    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
814    this processor.
815 
816    Collective on PC iff first_local is requested
817 
818    Input Parameter:
819 .  pc - the preconditioner context
820 
821    Output Parameters:
822 +  n_local - the number of blocks on this processor or PETSC_NULL
823 .  first_local - the global number of the first block on this processor or PETSC_NULL,
824                  all processors must request or all must pass PETSC_NULL
825 -  ksp - the array of KSP contexts
826 
827    Note:
828    After PCASMGetSubKSP() the array of KSPes is not to be freed
829 
830    Currently for some matrix implementations only 1 block per processor
831    is supported.
832 
833    You must call KSPSetUp() before calling PCASMGetSubKSP().
834 
835    Level: advanced
836 
837 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
838 
839 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
840           PCASMCreateSubdomains2D(),
841 @*/
842 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
843 {
844   PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **);
845 
846   PetscFunctionBegin;
847   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
848   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr);
849   if (f) {
850     ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr);
851   } else {
852     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC");
853   }
854 
855  PetscFunctionReturn(0);
856 }
857 
858 /* -------------------------------------------------------------------------------------*/
859 /*MC
860    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
861            its own KSP object.
862 
863    Options Database Keys:
864 +  -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
865 .  -pc_asm_in_place - Activates in-place factorization
866 .  -pc_asm_blocks <blks> - Sets total blocks
867 .  -pc_asm_overlap <ovl> - Sets overlap
868 -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
869 
870      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
871       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
872       -pc_asm_type basic to use the standard ASM.
873 
874    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
875      than one processor. Defaults to one block per processor.
876 
877      To set options on the solvers for each block append -sub_ to all the KSP, and PC
878         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
879 
880      To set the options on the solvers separate for each block call PCASMGetSubKSP()
881          and set the options directly on the resulting KSP object (you can access its PC
882          with KSPGetPC())
883 
884 
885    Level: beginner
886 
887    Concepts: additive Schwarz method
888 
889     References:
890     An additive variant of the Schwarz alternating method for the case of many subregions
891     M Dryja, OB Widlund - Courant Institute, New York University Technical report
892 
893     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
894     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
895 
896 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
897            PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
898            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType(),
899            PCASMSetUseInPlace()
900 M*/
901 
902 EXTERN_C_BEGIN
903 #undef __FUNCT__
904 #define __FUNCT__ "PCCreate_ASM"
905 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc)
906 {
907   PetscErrorCode ierr;
908   PC_ASM         *osm;
909 
910   PetscFunctionBegin;
911   ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr);
912   osm->n                 = PETSC_DECIDE;
913   osm->n_local           = 0;
914   osm->n_local_true      = 0;
915   osm->overlap           = 1;
916   osm->ksp               = 0;
917   osm->scat              = 0;
918   osm->x                 = 0;
919   osm->y                 = 0;
920   osm->is                = 0;
921   osm->mat               = 0;
922   osm->pmat              = 0;
923   osm->type              = PC_ASM_RESTRICT;
924   osm->same_local_solves = PETSC_TRUE;
925   osm->inplace           = PETSC_FALSE;
926 
927   pc->data                   = (void*)osm;
928   pc->ops->apply             = PCApply_ASM;
929   pc->ops->applytranspose    = PCApplyTranspose_ASM;
930   pc->ops->setup             = PCSetUp_ASM;
931   pc->ops->destroy           = PCDestroy_ASM;
932   pc->ops->setfromoptions    = PCSetFromOptions_ASM;
933   pc->ops->setuponblocks     = PCSetUpOnBlocks_ASM;
934   pc->ops->view              = PCView_ASM;
935   pc->ops->applyrichardson   = 0;
936 
937   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
938                     PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr);
939   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
940                     PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr);
941   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
942                     PCASMSetOverlap_ASM);CHKERRQ(ierr);
943   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
944                     PCASMSetType_ASM);CHKERRQ(ierr);
945   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
946                     PCASMGetSubKSP_ASM);CHKERRQ(ierr);
947   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetUseInPlace_C","PCASMSetUseInPlace_ASM",
948                     PCASMSetUseInPlace_ASM);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 EXTERN_C_END
952 
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "PCASMCreateSubdomains"
956 /*@C
957    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
958    preconditioner for a any problem on a general grid.
959 
960    Collective
961 
962    Input Parameters:
963 +  A - The global matrix operator
964 -  n - the number of local blocks
965 
966    Output Parameters:
967 .  outis - the array of index sets defining the subdomains
968 
969    Level: advanced
970 
971    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
972     from these if you use PCASMSetLocalSubdomains()
973 
974     In the Fortran version you must provide the array outis[] already allocated of length n.
975 
976 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
977 
978 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
979 @*/
980 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
981 {
982   MatPartitioning           mpart;
983   const char                *prefix;
984   PetscErrorCode            (*f)(Mat,PetscTruth*,MatReuse,Mat*);
985   PetscMPIInt               size;
986   PetscInt                  i,j,rstart,rend,bs;
987   PetscTruth                iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
988   Mat                       Ad = PETSC_NULL, adj;
989   IS                        ispart,isnumb,*is;
990   PetscErrorCode            ierr;
991 
992   PetscFunctionBegin;
993   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
994   PetscValidPointer(outis,3);
995   if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);}
996 
997   /* Get prefix, row distribution, and block size */
998   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
999   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1000   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1001   if (rstart/bs*bs != rstart || rend/bs*bs != rend) {
1002     SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1003   }
1004   /* Get diagonal block from matrix if possible */
1005   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1006   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1007   if (f) {
1008     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1009   } else if (size == 1) {
1010     iscopy = PETSC_FALSE; Ad = A;
1011   } else {
1012     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1013   }
1014   if (Ad) {
1015     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1016     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1017   }
1018   if (Ad && n > 1) {
1019     PetscTruth match,done;
1020     /* Try to setup a good matrix partitioning if available */
1021     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1022     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1023     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1024     ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr);
1025     if (!match) {
1026       ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr);
1027     }
1028     if (!match) { /* assume a "good" partitioner is available */
1029       PetscInt na,*ia,*ja;
1030       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1031       if (done) {
1032 	/* Build adjacency matrix by hand. Unfortunately a call to
1033 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1034 	   remove the block-aij structure and we cannot expect
1035 	   MatPartitioning to split vertices as we need */
1036 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1037 	nnz = 0;
1038 	for (i=0; i<na; i++) { /* count number of nonzeros */
1039 	  len = ia[i+1] - ia[i];
1040 	  row = ja + ia[i];
1041 	  for (j=0; j<len; j++) {
1042 	    if (row[j] == i) { /* don't count diagonal */
1043 	      len--; break;
1044 	    }
1045 	  }
1046 	  nnz += len;
1047 	}
1048 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1049 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1050 	nnz    = 0;
1051 	iia[0] = 0;
1052 	for (i=0; i<na; i++) { /* fill adjacency */
1053 	  cnt = 0;
1054 	  len = ia[i+1] - ia[i];
1055 	  row = ja + ia[i];
1056 	  for (j=0; j<len; j++) {
1057 	    if (row[j] != i) { /* if not diagonal */
1058 	      jja[nnz+cnt++] = row[j];
1059 	    }
1060 	  }
1061 	  nnz += cnt;
1062 	  iia[i+1] = nnz;
1063 	}
1064 	/* Partitioning of the adjacency matrix */
1065 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1066 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1067 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1068 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1069 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1070 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1071 	foundpart = PETSC_TRUE;
1072       }
1073       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1074     }
1075     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1076   }
1077   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1078 
1079   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1080   *outis = is;
1081 
1082   if (!foundpart) {
1083 
1084     /* Partitioning by contiguous chunks of rows */
1085 
1086     PetscInt mbs   = (rend-rstart)/bs;
1087     PetscInt start = rstart;
1088     for (i=0; i<n; i++) {
1089       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1090       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1091       start += count;
1092     }
1093 
1094   } else {
1095 
1096     /* Partitioning by adjacency of diagonal block  */
1097 
1098     const PetscInt *numbering;
1099     PetscInt       *count,nidx,*indices,*newidx,start=0;
1100     /* Get node count in each partition */
1101     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1102     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1103     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1104       for (i=0; i<n; i++) count[i] *= bs;
1105     }
1106     /* Build indices from node numbering */
1107     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1108     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1109     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1110     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1111     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1112     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1113     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1114       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1115       for (i=0; i<nidx; i++)
1116 	for (j=0; j<bs; j++)
1117 	  newidx[i*bs+j] = indices[i]*bs + j;
1118       ierr = PetscFree(indices);CHKERRQ(ierr);
1119       nidx   *= bs;
1120       indices = newidx;
1121     }
1122     /* Shift to get global indices */
1123     for (i=0; i<nidx; i++) indices[i] += rstart;
1124 
1125     /* Build the index sets for each block */
1126     for (i=0; i<n; i++) {
1127       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr);
1128       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1129       start += count[i];
1130     }
1131 
1132     ierr = PetscFree(count);
1133     ierr = PetscFree(indices);
1134     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1135     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1136 
1137   }
1138 
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "PCASMDestroySubdomains"
1144 /*@C
1145    PCASMDestroySubdomains - Destroys the index sets created with
1146    PCASMCreateSubdomains(). Should be called after setting subdomains
1147    with PCASMSetLocalSubdomains().
1148 
1149    Collective
1150 
1151    Input Parameters:
1152 +  n - the number of index sets
1153 -  is - the array of index sets
1154 
1155    Level: advanced
1156 
1157 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1158 
1159 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1160 @*/
1161 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[])
1162 {
1163   PetscInt       i;
1164   PetscErrorCode ierr;
1165   PetscFunctionBegin;
1166   if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1167   PetscValidPointer(is,2);
1168   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1169   ierr = PetscFree(is);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "PCASMCreateSubdomains2D"
1175 /*@
1176    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1177    preconditioner for a two-dimensional problem on a regular grid.
1178 
1179    Not Collective
1180 
1181    Input Parameters:
1182 +  m, n - the number of mesh points in the x and y directions
1183 .  M, N - the number of subdomains in the x and y directions
1184 .  dof - degrees of freedom per node
1185 -  overlap - overlap in mesh lines
1186 
1187    Output Parameters:
1188 +  Nsub - the number of subdomains created
1189 -  is - the array of index sets defining the subdomains
1190 
1191    Note:
1192    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1193    preconditioners.  More general related routines are
1194    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1195 
1196    Level: advanced
1197 
1198 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1199 
1200 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1201           PCASMSetOverlap()
1202 @*/
1203 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is)
1204 {
1205   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter;
1206   PetscErrorCode ierr;
1207   PetscInt       nidx,*idx,loc,ii,jj,count;
1208 
1209   PetscFunctionBegin;
1210   if (dof != 1) SETERRQ(PETSC_ERR_SUP," ");
1211 
1212   *Nsub = N*M;
1213   ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr);
1214   ystart = 0;
1215   loc_outter = 0;
1216   for (i=0; i<N; i++) {
1217     height = n/N + ((n % N) > i); /* height of subdomain */
1218     if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1219     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1220     yright = ystart + height + overlap; if (yright > n) yright = n;
1221     xstart = 0;
1222     for (j=0; j<M; j++) {
1223       width = m/M + ((m % M) > j); /* width of subdomain */
1224       if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1225       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1226       xright = xstart + width + overlap; if (xright > m) xright = m;
1227       nidx   = (xright - xleft)*(yright - yleft);
1228       ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1229       loc    = 0;
1230       for (ii=yleft; ii<yright; ii++) {
1231         count = m*ii + xleft;
1232         for (jj=xleft; jj<xright; jj++) {
1233           idx[loc++] = count++;
1234         }
1235       }
1236       ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr);
1237       ierr = PetscFree(idx);CHKERRQ(ierr);
1238       xstart += width;
1239     }
1240     ystart += height;
1241   }
1242   for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "PCASMGetLocalSubdomains"
1248 /*@C
1249     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1250     only) for the additive Schwarz preconditioner.
1251 
1252     Collective on PC
1253 
1254     Input Parameter:
1255 .   pc - the preconditioner context
1256 
1257     Output Parameters:
1258 +   n - the number of subdomains for this processor (default value = 1)
1259 -   is - the index sets that define the subdomains for this processor
1260 
1261 
1262     Notes:
1263     The IS numbering is in the parallel, global numbering of the vector.
1264 
1265     Level: advanced
1266 
1267 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1268 
1269 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1270           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1271 @*/
1272 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[])
1273 {
1274   PC_ASM         *osm;
1275   PetscErrorCode ierr;
1276   PetscTruth     match;
1277 
1278   PetscFunctionBegin;
1279   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1280   PetscValidIntPointer(n,2);
1281   if (is) PetscValidPointer(is,3);
1282   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1283   if (!match) {
1284     if (n)  *n  = 0;
1285     if (is) *is = PETSC_NULL;
1286   } else {
1287     osm = (PC_ASM*)pc->data;
1288     if (n)  *n  = osm->n_local_true;
1289     if (is) *is = osm->is;
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "PCASMGetLocalSubmatrices"
1296 /*@C
1297     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1298     only) for the additive Schwarz preconditioner.
1299 
1300     Collective on PC
1301 
1302     Input Parameter:
1303 .   pc - the preconditioner context
1304 
1305     Output Parameters:
1306 +   n - the number of matrices for this processor (default value = 1)
1307 -   mat - the matrices
1308 
1309 
1310     Level: advanced
1311 
1312 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1313 
1314 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1315           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1316 @*/
1317 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1318 {
1319   PC_ASM         *osm;
1320   PetscErrorCode ierr;
1321   PetscTruth     match;
1322 
1323   PetscFunctionBegin;
1324   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1325   PetscValidIntPointer(n,2);
1326   if (mat) PetscValidPointer(mat,3);
1327   if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1328   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1329   if (!match) {
1330     if (n)   *n   = 0;
1331     if (mat) *mat = PETSC_NULL;
1332   } else {
1333     osm = (PC_ASM*)pc->data;
1334     if (n)   *n   = osm->n_local_true;
1335     if (mat) *mat = osm->pmat;
1336   }
1337   PetscFunctionReturn(0);
1338 }
1339