xref: /petsc/src/ksp/pc/impls/asm/asm.c (revision 7d6bfa3b9d7db0ccd4cc481237114ca8dbb0dbff)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an additive Schwarz preconditioner for any Mat implementation.
5 
6   Note that each processor may have any number of subdomains. But in order to
7   deal easily with the VecScatter(), we treat each processor as if it has the
8   same number of subdomains.
9 
10        n - total number of true subdomains on all processors
11        n_local_true - actual number of subdomains on this processor
12        n_local = maximum over all processors of n_local_true
13 */
14 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
15 
16 typedef struct {
17   PetscInt   n,n_local,n_local_true;
18   PetscInt   overlap;             /* overlap requested by user */
19   KSP        *ksp;                /* linear solvers for each block */
20   VecScatter *scat;               /* mapping to subregion */
21   Vec        *x,*y;               /* work vectors */
22   IS         *is;                 /* index set that defines each subdomain */
23   Mat        *mat,*pmat;          /* mat is not currently used */
24   PCASMType  type;                /* use reduced interpolation, restriction or both */
25   PetscTruth type_set;            /* if user set this value (so won't change it for symmetric problems) */
26   PetscTruth same_local_solves;   /* flag indicating whether all local solvers are same */
27   PetscTruth inplace;             /* indicates that the sub-matrices are deleted after
28                                      PCSetUpOnBlocks() is done. Similar to inplace
29                                      factorization in the case of LU and ILU */
30 } PC_ASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCView_ASM"
34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35 {
36   PC_ASM         *osm = (PC_ASM*)pc->data;
37   PetscErrorCode ierr;
38   PetscMPIInt    rank;
39   PetscInt       i;
40   PetscTruth     iascii,isstring;
41   PetscViewer    sviewer;
42 
43 
44   PetscFunctionBegin;
45   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
46   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
47   if (iascii) {
48     if (osm->n > 0) {
49       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr);
50     } else {
51       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr);
54     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
55     if (osm->same_local_solves) {
56       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr);
57       ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
58       if (!rank && osm->ksp) {
59         ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
60         ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
61         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
62       }
63       ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
64     } else {
65       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
66       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D\n",rank,osm->n_local_true);CHKERRQ(ierr);
67       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
68       for (i=0; i<osm->n_local; i++) {
69         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
70         if (i < osm->n_local_true) {
71           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D\n",rank,i);CHKERRQ(ierr);
72           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
73           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
74         }
75         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
76       }
77       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
78       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
79     }
80   } else if (isstring) {
81     ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%D",osm->n,osm->overlap,osm->type);CHKERRQ(ierr);
82     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
83     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
84     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
85   } else {
86     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCASMPrintSubdomains"
93 static PetscErrorCode PCASMPrintSubdomains(PC pc)
94 {
95   PC_ASM         *osm  = (PC_ASM*)pc->data;
96   const char     *prefix;
97   char           fname[PETSC_MAX_PATH_LEN+1];
98   PetscViewer    viewer;
99   PetscInt       i,j,nidx;
100   const PetscInt *idx;
101   PetscErrorCode ierr;
102   PetscFunctionBegin;
103   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
104   ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",
105 			       fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr);
106   if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
107   ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr);
108   for (i=0;i<osm->n_local_true;i++) {
109     ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr);
110     ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr);
111     for (j=0; j<nidx; j++) {
112       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
113     }
114     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
115     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
116   }
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "PCSetUp_ASM"
124 static PetscErrorCode PCSetUp_ASM(PC pc)
125 {
126   PC_ASM         *osm  = (PC_ASM*)pc->data;
127   PetscErrorCode ierr;
128   PetscTruth     symset,flg;
129   PetscInt       i,m,start,start_val,end_val,mbs,bs;
130   PetscMPIInt    size;
131   MatReuse       scall = MAT_REUSE_MATRIX;
132   IS             isl;
133   KSP            ksp;
134   PC             subpc;
135   const char     *prefix,*pprefix;
136   Vec            vec;
137 
138   PetscFunctionBegin;
139   if (!pc->setupcalled) {
140 
141     if (!osm->type_set) {
142       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
143       if (symset && flg) { osm->type = PC_ASM_BASIC; }
144     }
145 
146     if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) {
147       /* no subdomains given, use one per processor */
148       osm->n_local = osm->n_local_true = 1;
149       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
150       osm->n = size;
151     } else if (osm->n == PETSC_DECIDE) {
152       /* determine global number of subdomains */
153       PetscInt inwork[2],outwork[2];
154       inwork[0] = inwork[1] = osm->n_local_true;
155       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
156       osm->n_local = outwork[0];
157       osm->n       = outwork[1];
158     }
159 
160     if (!osm->is){ /* build the index sets */
161       ierr  = MatGetOwnershipRange(pc->pmat,&start_val,&end_val);CHKERRQ(ierr);
162       ierr  = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
163       if (end_val/bs*bs != end_val || start_val/bs*bs != start_val) {
164         SETERRQ(PETSC_ERR_ARG_WRONG,"Bad distribution for matrix block size");
165       }
166       ierr  = PetscMalloc(osm->n_local_true*sizeof(IS *),&osm->is);CHKERRQ(ierr);
167       mbs = (end_val - start_val)/bs;
168       start = start_val;
169       for (i=0; i<osm->n_local_true; i++){
170         size       =  (mbs/osm->n_local_true + ((mbs % osm->n_local_true) > i))*bs;
171         ierr       =  ISCreateStride(PETSC_COMM_SELF,size,start,1,&isl);CHKERRQ(ierr);
172         start      += size;
173         osm->is[i] =  isl;
174       }
175     }
176     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
177     ierr = PetscOptionsHasName(prefix,"-pc_asm_print_subdomains",&flg);CHKERRQ(ierr);
178     if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); }
179 
180     ierr   = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
181     ierr   = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->scat);CHKERRQ(ierr);
182     ierr   = PetscMalloc(2*osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr);
183     osm->y = osm->x + osm->n_local;
184 
185     /*  Extend the "overlapping" regions by a number of steps  */
186     ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr);
187     for (i=0; i<osm->n_local_true; i++) {
188       ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
189     }
190 
191     /* Create the local work vectors and scatter contexts */
192     ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
193     for (i=0; i<osm->n_local_true; i++) {
194       ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr);
195       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr);
196       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
197       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
198       ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr);
199       ierr = ISDestroy(isl);CHKERRQ(ierr);
200     }
201     for (i=osm->n_local_true; i<osm->n_local; i++) {
202       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr);
203       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
204       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr);
205       ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr);
206       ierr = ISDestroy(isl);CHKERRQ(ierr);
207     }
208     ierr = VecDestroy(vec);CHKERRQ(ierr);
209 
210     /*
211        Create the local solvers.
212     */
213     for (i=0; i<osm->n_local_true; i++) {
214       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
215       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
216       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
217       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
218       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
219       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
220       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
221       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
222       osm->ksp[i] = ksp;
223     }
224     scall = MAT_INITIAL_MATRIX;
225 
226   } else {
227     /*
228        Destroy the blocks from the previous iteration
229     */
230     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
231       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
232       scall = MAT_INITIAL_MATRIX;
233     }
234   }
235 
236   /*
237      Extract out the submatrices
238   */
239   ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr);
240   if (scall == MAT_INITIAL_MATRIX) {
241     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
242     for (i=0; i<osm->n_local_true; i++) {
243       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
244       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
245     }
246   }
247 
248   /* Return control to the user so that the submatrices can be modified (e.g., to apply
249      different boundary conditions for the submatrices than for the global problem) */
250   ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
251 
252   /*
253      Loop over subdomains putting them into local ksp
254   */
255   for (i=0; i<osm->n_local_true; i++) {
256     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
257     if (!pc->setupcalled) {
258       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
259     }
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "PCSetUpOnBlocks_ASM"
266 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
267 {
268   PC_ASM         *osm = (PC_ASM*)pc->data;
269   PetscErrorCode ierr;
270   PetscInt       i;
271 
272   PetscFunctionBegin;
273   for (i=0; i<osm->n_local_true; i++) {
274     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
275   }
276   /*
277      If inplace flag is set, then destroy the matrix after the setup
278      on blocks is done.
279   */
280   if (osm->inplace && osm->n_local_true > 0) {
281     ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
282   }
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "PCApply_ASM"
288 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
289 {
290   PC_ASM         *osm = (PC_ASM*)pc->data;
291   PetscErrorCode ierr;
292   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
293   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
294 
295   PetscFunctionBegin;
296   /*
297      Support for limiting the restriction or interpolation to only local
298      subdomain values (leaving the other values 0).
299   */
300   if (!(osm->type & PC_ASM_RESTRICT)) {
301     forward = SCATTER_FORWARD_LOCAL;
302     /* have to zero the work RHS since scatter may leave some slots empty */
303     for (i=0; i<n_local_true; i++) {
304       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
305     }
306   }
307   if (!(osm->type & PC_ASM_INTERPOLATE)) {
308     reverse = SCATTER_REVERSE_LOCAL;
309   }
310 
311   for (i=0; i<n_local; i++) {
312     ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
313   }
314   ierr = VecSet(y,0.0);CHKERRQ(ierr);
315   /* do the local solves */
316   for (i=0; i<n_local_true; i++) {
317     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
318     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
319     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
320   }
321   /* handle the rest of the scatters that do not have local solves */
322   for (i=n_local_true; i<n_local; i++) {
323     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
324     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
325   }
326   for (i=0; i<n_local; i++) {
327     ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "PCApplyTranspose_ASM"
334 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
335 {
336   PC_ASM         *osm = (PC_ASM*)pc->data;
337   PetscErrorCode ierr;
338   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
339   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
340 
341   PetscFunctionBegin;
342   /*
343      Support for limiting the restriction or interpolation to only local
344      subdomain values (leaving the other values 0).
345 
346      Note: these are reversed from the PCApply_ASM() because we are applying the
347      transpose of the three terms
348   */
349   if (!(osm->type & PC_ASM_INTERPOLATE)) {
350     forward = SCATTER_FORWARD_LOCAL;
351     /* have to zero the work RHS since scatter may leave some slots empty */
352     for (i=0; i<n_local_true; i++) {
353       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
354     }
355   }
356   if (!(osm->type & PC_ASM_RESTRICT)) {
357     reverse = SCATTER_REVERSE_LOCAL;
358   }
359 
360   for (i=0; i<n_local; i++) {
361     ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
362   }
363   ierr = VecSet(y,0.0);CHKERRQ(ierr);
364   /* do the local solves */
365   for (i=0; i<n_local_true; i++) {
366     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
367     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
368     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
369   }
370   /* handle the rest of the scatters that do not have local solves */
371   for (i=n_local_true; i<n_local; i++) {
372     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
373     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
374   }
375   for (i=0; i<n_local; i++) {
376     ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 #undef __FUNCT__
382 #define __FUNCT__ "PCDestroy_ASM"
383 static PetscErrorCode PCDestroy_ASM(PC pc)
384 {
385   PC_ASM         *osm = (PC_ASM*)pc->data;
386   PetscErrorCode ierr;
387   PetscInt       i;
388 
389   PetscFunctionBegin;
390   if (osm->ksp) {
391     for (i=0; i<osm->n_local_true; i++) {
392       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
393     }
394     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
395   }
396   if (osm->pmat && !osm->inplace) {
397     if (osm->n_local_true > 0) {
398       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
399     }
400   }
401   if (osm->scat) {
402     for (i=0; i<osm->n_local; i++) {
403       ierr = VecScatterDestroy(osm->scat[i]);CHKERRQ(ierr);
404       ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
405       ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
406     }
407     ierr = PetscFree(osm->scat);CHKERRQ(ierr);
408     ierr = PetscFree(osm->x);CHKERRQ(ierr);
409   }
410   if (osm->is) {
411     for (i=0; i<osm->n_local_true; i++) {
412       ierr = ISDestroy(osm->is[i]);CHKERRQ(ierr);
413     }
414     ierr = PetscFree(osm->is);CHKERRQ(ierr);
415   }
416   ierr = PetscFree(osm);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "PCSetFromOptions_ASM"
422 static PetscErrorCode PCSetFromOptions_ASM(PC pc)
423 {
424   PC_ASM         *osm = (PC_ASM*)pc->data;
425   PetscErrorCode ierr;
426   PetscInt       blocks,ovl;
427   PetscTruth     symset,flg;
428   PCASMType      asmtype;
429 
430   PetscFunctionBegin;
431   /* set the type to symmetric if matrix is symmetric */
432   if (!osm->type_set && pc->pmat) {
433     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
434     if (symset && flg) { osm->type = PC_ASM_BASIC; }
435   }
436   ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr);
437     ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",
438 			   osm->n,&blocks,&flg);CHKERRQ(ierr);
439     if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL);CHKERRQ(ierr); }
440     ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",
441 			   osm->overlap,&ovl,&flg);CHKERRQ(ierr);
442     if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
443     ierr = PetscOptionsName("-pc_asm_in_place","Perform matrix factorization inplace","PCASMSetUseInPlace",&flg);CHKERRQ(ierr);
444     if (flg) {ierr = PCASMSetUseInPlace(pc);CHKERRQ(ierr); }
445     ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",
446 			    PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr);
447     if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); }
448   ierr = PetscOptionsTail();CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 /*------------------------------------------------------------------------------------*/
453 
454 EXTERN_C_BEGIN
455 #undef __FUNCT__
456 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM"
457 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[])
458 {
459   PC_ASM         *osm = (PC_ASM*)pc->data;
460   PetscErrorCode ierr;
461   PetscInt       i;
462 
463   PetscFunctionBegin;
464   if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
465   if (pc->setupcalled && (n != osm->n_local_true || is)) {
466     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup().");
467   }
468   if (!pc->setupcalled) {
469     if (is) {
470       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
471     }
472     if (osm->is) {
473       for (i=0; i<osm->n_local_true; i++) {ierr = ISDestroy(osm->is[i]);CHKERRQ(ierr);}
474       ierr = PetscFree(osm->is);CHKERRQ(ierr);
475     }
476     osm->n_local_true = n;
477     osm->is           = 0;
478     if (is) {
479       ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr);
480       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
481     }
482   }
483   PetscFunctionReturn(0);
484 }
485 EXTERN_C_END
486 
487 EXTERN_C_BEGIN
488 #undef __FUNCT__
489 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM"
490 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is)
491 {
492   PC_ASM         *osm = (PC_ASM*)pc->data;
493   PetscErrorCode ierr;
494   PetscMPIInt    rank,size;
495   PetscInt       i,n;
496 
497   PetscFunctionBegin;
498   if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
499   if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
500 
501   /*
502      Split the subdomains equally amoung all processors
503   */
504   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
505   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
506   n = N/size + ((N % size) > rank);
507   if (!n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have at least one block: total processors %d total blocks %D",(int)size,n);
508   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup().");
509   if (!pc->setupcalled) {
510     if (osm->is) {
511       for (i=0; i<osm->n_local_true; i++) {
512 	ierr = ISDestroy(osm->is[i]);CHKERRQ(ierr);
513       }
514       ierr = PetscFree(osm->is);CHKERRQ(ierr);
515     }
516     osm->n_local_true = n;
517     osm->is           = 0;
518   }
519   PetscFunctionReturn(0);
520 }
521 EXTERN_C_END
522 
523 EXTERN_C_BEGIN
524 #undef __FUNCT__
525 #define __FUNCT__ "PCASMSetOverlap_ASM"
526 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
527 {
528   PC_ASM *osm = (PC_ASM*)pc->data;
529 
530   PetscFunctionBegin;
531   if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
532   osm->overlap = ovl;
533   PetscFunctionReturn(0);
534 }
535 EXTERN_C_END
536 
537 EXTERN_C_BEGIN
538 #undef __FUNCT__
539 #define __FUNCT__ "PCASMSetType_ASM"
540 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type)
541 {
542   PC_ASM *osm = (PC_ASM*)pc->data;
543 
544   PetscFunctionBegin;
545   osm->type     = type;
546   osm->type_set = PETSC_TRUE;
547   PetscFunctionReturn(0);
548 }
549 EXTERN_C_END
550 
551 EXTERN_C_BEGIN
552 #undef __FUNCT__
553 #define __FUNCT__ "PCASMGetSubKSP_ASM"
554 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
555 {
556   PC_ASM         *osm = (PC_ASM*)pc->data;
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560   if (osm->n_local_true < 1) {
561     SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
562   }
563 
564   if (n_local) {
565     *n_local = osm->n_local_true;
566   }
567   if (first_local) {
568     ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
569     *first_local -= osm->n_local_true;
570   }
571   if (ksp) {
572     /* Assume that local solves are now different; not necessarily
573        true though!  This flag is used only for PCView_ASM() */
574     *ksp                   = osm->ksp;
575     osm->same_local_solves = PETSC_FALSE;
576   }
577   PetscFunctionReturn(0);
578 }
579 EXTERN_C_END
580 
581 EXTERN_C_BEGIN
582 #undef __FUNCT__
583 #define __FUNCT__ "PCASMSetUseInPlace_ASM"
584 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace_ASM(PC pc)
585 {
586   PC_ASM *osm = (PC_ASM*)pc->data;
587 
588   PetscFunctionBegin;
589   osm->inplace = PETSC_TRUE;
590   PetscFunctionReturn(0);
591 }
592 EXTERN_C_END
593 
594 /*----------------------------------------------------------------------------*/
595 #undef __FUNCT__
596 #define __FUNCT__ "PCASMSetUseInPlace"
597 /*@
598    PCASMSetUseInPlace - Tells the system to destroy the matrix after setup is done.
599 
600    Collective on PC
601 
602    Input Parameters:
603 .  pc - the preconditioner context
604 
605    Options Database Key:
606 .  -pc_asm_in_place - Activates in-place factorization
607 
608    Note:
609    PCASMSetUseInplace() can only be used with the KSP method KSPPREONLY, and
610    when the original matrix is not required during the Solve process.
611    This destroys the matrix, early thus, saving on memory usage.
612 
613    Level: intermediate
614 
615 .keywords: PC, set, factorization, direct, inplace, in-place, ASM
616 
617 .seealso: PCFactorSetUseInPlace()
618 @*/
619 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace(PC pc)
620 {
621   PetscErrorCode ierr,(*f)(PC);
622 
623   PetscFunctionBegin;
624   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
625   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetUseInPlace_C",(void (**)(void))&f);CHKERRQ(ierr);
626   if (f) {
627     ierr = (*f)(pc);CHKERRQ(ierr);
628   }
629   PetscFunctionReturn(0);
630 }
631 /*----------------------------------------------------------------------------*/
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "PCASMSetLocalSubdomains"
635 /*@C
636     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor
637     only) for the additive Schwarz preconditioner.
638 
639     Collective on PC
640 
641     Input Parameters:
642 +   pc - the preconditioner context
643 .   n - the number of subdomains for this processor (default value = 1)
644 -   is - the index sets that define the subdomains for this processor
645          (or PETSC_NULL for PETSc to determine subdomains)
646 
647     Notes:
648     The IS numbering is in the parallel, global numbering of the vector.
649 
650     By default the ASM preconditioner uses 1 block per processor.
651 
652     These index sets cannot be destroyed until after completion of the
653     linear solves for which the ASM preconditioner is being used.
654 
655     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
656 
657     Level: advanced
658 
659 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
660 
661 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
662           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
663 @*/
664 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[])
665 {
666   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[]);
667 
668   PetscFunctionBegin;
669   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
670   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
671   if (f) {
672     ierr = (*f)(pc,n,is);CHKERRQ(ierr);
673   }
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNCT__
678 #define __FUNCT__ "PCASMSetTotalSubdomains"
679 /*@C
680     PCASMSetTotalSubdomains - Sets the subdomains for all processor for the
681     additive Schwarz preconditioner.  Either all or no processors in the
682     PC communicator must call this routine, with the same index sets.
683 
684     Collective on PC
685 
686     Input Parameters:
687 +   pc - the preconditioner context
688 .   n - the number of subdomains for all processors
689 -   is - the index sets that define the subdomains for all processor
690          (or PETSC_NULL for PETSc to determine subdomains)
691 
692     Options Database Key:
693     To set the total number of subdomain blocks rather than specify the
694     index sets, use the option
695 .    -pc_asm_blocks <blks> - Sets total blocks
696 
697     Notes:
698     Currently you cannot use this to set the actual subdomains with the argument is.
699 
700     By default the ASM preconditioner uses 1 block per processor.
701 
702     These index sets cannot be destroyed until after completion of the
703     linear solves for which the ASM preconditioner is being used.
704 
705     Use PCASMSetLocalSubdomains() to set local subdomains.
706 
707     Level: advanced
708 
709 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
710 
711 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
712           PCASMCreateSubdomains2D()
713 @*/
714 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS *is)
715 {
716   PetscErrorCode ierr,(*f)(PC,PetscInt,IS *);
717 
718   PetscFunctionBegin;
719   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
720   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
721   if (f) {
722     ierr = (*f)(pc,N,is);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "PCASMSetOverlap"
729 /*@
730     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
731     additive Schwarz preconditioner.  Either all or no processors in the
732     PC communicator must call this routine.
733 
734     Collective on PC
735 
736     Input Parameters:
737 +   pc  - the preconditioner context
738 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
739 
740     Options Database Key:
741 .   -pc_asm_overlap <ovl> - Sets overlap
742 
743     Notes:
744     By default the ASM preconditioner uses 1 block per processor.  To use
745     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
746     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
747 
748     The overlap defaults to 1, so if one desires that no additional
749     overlap be computed beyond what may have been set with a call to
750     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
751     must be set to be 0.  In particular, if one does not explicitly set
752     the subdomains an application code, then all overlap would be computed
753     internally by PETSc, and using an overlap of 0 would result in an ASM
754     variant that is equivalent to the block Jacobi preconditioner.
755 
756     Note that one can define initial index sets with any overlap via
757     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
758     PCASMSetOverlap() merely allows PETSc to extend that overlap further
759     if desired.
760 
761     Level: intermediate
762 
763 .keywords: PC, ASM, set, overlap
764 
765 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
766           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
767 @*/
768 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl)
769 {
770   PetscErrorCode ierr,(*f)(PC,PetscInt);
771 
772   PetscFunctionBegin;
773   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
774   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr);
775   if (f) {
776     ierr = (*f)(pc,ovl);CHKERRQ(ierr);
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "PCASMSetType"
783 /*@
784     PCASMSetType - Sets the type of restriction and interpolation used
785     for local problems in the additive Schwarz method.
786 
787     Collective on PC
788 
789     Input Parameters:
790 +   pc  - the preconditioner context
791 -   type - variant of ASM, one of
792 .vb
793       PC_ASM_BASIC       - full interpolation and restriction
794       PC_ASM_RESTRICT    - full restriction, local processor interpolation
795       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
796       PC_ASM_NONE        - local processor restriction and interpolation
797 .ve
798 
799     Options Database Key:
800 .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
801 
802     Level: intermediate
803 
804 .keywords: PC, ASM, set, type
805 
806 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
807           PCASMCreateSubdomains2D()
808 @*/
809 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type)
810 {
811   PetscErrorCode ierr,(*f)(PC,PCASMType);
812 
813   PetscFunctionBegin;
814   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
815   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr);
816   if (f) {
817     ierr = (*f)(pc,type);CHKERRQ(ierr);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "PCASMGetSubKSP"
824 /*@C
825    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
826    this processor.
827 
828    Collective on PC iff first_local is requested
829 
830    Input Parameter:
831 .  pc - the preconditioner context
832 
833    Output Parameters:
834 +  n_local - the number of blocks on this processor or PETSC_NULL
835 .  first_local - the global number of the first block on this processor or PETSC_NULL,
836                  all processors must request or all must pass PETSC_NULL
837 -  ksp - the array of KSP contexts
838 
839    Note:
840    After PCASMGetSubKSP() the array of KSPes is not to be freed
841 
842    Currently for some matrix implementations only 1 block per processor
843    is supported.
844 
845    You must call KSPSetUp() before calling PCASMGetSubKSP().
846 
847    Level: advanced
848 
849 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
850 
851 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
852           PCASMCreateSubdomains2D(),
853 @*/
854 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
855 {
856   PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **);
857 
858   PetscFunctionBegin;
859   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
860   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr);
861   if (f) {
862     ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr);
863   } else {
864     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC");
865   }
866 
867  PetscFunctionReturn(0);
868 }
869 
870 /* -------------------------------------------------------------------------------------*/
871 /*MC
872    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
873            its own KSP object.
874 
875    Options Database Keys:
876 +  -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
877 .  -pc_asm_in_place - Activates in-place factorization
878 .  -pc_asm_blocks <blks> - Sets total blocks
879 .  -pc_asm_overlap <ovl> - Sets overlap
880 -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
881 
882      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
883       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
884       -pc_asm_type basic to use the standard ASM.
885 
886    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
887      than one processor. Defaults to one block per processor.
888 
889      To set options on the solvers for each block append -sub_ to all the KSP, and PC
890         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
891 
892      To set the options on the solvers separate for each block call PCASMGetSubKSP()
893          and set the options directly on the resulting KSP object (you can access its PC
894          with KSPGetPC())
895 
896 
897    Level: beginner
898 
899    Concepts: additive Schwarz method
900 
901     References:
902     An additive variant of the Schwarz alternating method for the case of many subregions
903     M Dryja, OB Widlund - Courant Institute, New York University Technical report
904 
905     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
906     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
907 
908 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
909            PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
910            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType(),
911            PCASMSetUseInPlace()
912 M*/
913 
914 EXTERN_C_BEGIN
915 #undef __FUNCT__
916 #define __FUNCT__ "PCCreate_ASM"
917 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc)
918 {
919   PetscErrorCode ierr;
920   PC_ASM         *osm;
921 
922   PetscFunctionBegin;
923   ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr);
924   osm->n                 = PETSC_DECIDE;
925   osm->n_local           = 0;
926   osm->n_local_true      = 0;
927   osm->overlap           = 1;
928   osm->ksp               = 0;
929   osm->scat              = 0;
930   osm->x                 = 0;
931   osm->y                 = 0;
932   osm->is                = 0;
933   osm->mat               = 0;
934   osm->pmat              = 0;
935   osm->type              = PC_ASM_RESTRICT;
936   osm->same_local_solves = PETSC_TRUE;
937   osm->inplace           = PETSC_FALSE;
938 
939   pc->data                   = (void*)osm;
940   pc->ops->apply             = PCApply_ASM;
941   pc->ops->applytranspose    = PCApplyTranspose_ASM;
942   pc->ops->setup             = PCSetUp_ASM;
943   pc->ops->destroy           = PCDestroy_ASM;
944   pc->ops->setfromoptions    = PCSetFromOptions_ASM;
945   pc->ops->setuponblocks     = PCSetUpOnBlocks_ASM;
946   pc->ops->view              = PCView_ASM;
947   pc->ops->applyrichardson   = 0;
948 
949   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
950                     PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr);
951   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
952                     PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr);
953   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
954                     PCASMSetOverlap_ASM);CHKERRQ(ierr);
955   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
956                     PCASMSetType_ASM);CHKERRQ(ierr);
957   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
958                     PCASMGetSubKSP_ASM);CHKERRQ(ierr);
959   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetUseInPlace_C","PCASMSetUseInPlace_ASM",
960                     PCASMSetUseInPlace_ASM);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 EXTERN_C_END
964 
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "PCASMCreateSubdomains"
968 /*@C
969    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
970    preconditioner for a any problem on a general grid.
971 
972    Collective
973 
974    Input Parameters:
975 +  A - The global matrix operator
976 -  n - the number of local blocks
977 
978    Output Parameters:
979 .  outis - the array of index sets defining the subdomains
980 
981    Level: advanced
982 
983    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
984     from these if you use PCASMSetLocalSubdomains()
985 
986     In the Fortran version you must provide the array outis[] already allocated of length n.
987 
988 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
989 
990 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
991 @*/
992 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
993 {
994   MatPartitioning           mpart;
995   const char                *prefix;
996   PetscErrorCode            (*f)(Mat,PetscTruth*,MatReuse,Mat*);
997   PetscMPIInt               size;
998   PetscInt                  i,j,rstart,rend,bs;
999   PetscTruth                iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1000   Mat                       Ad = PETSC_NULL, adj;
1001   IS                        ispart,isnumb,*is;
1002   PetscErrorCode            ierr;
1003 
1004   PetscFunctionBegin;
1005   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
1006   PetscValidPointer(outis,3);
1007   if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);}
1008 
1009   /* Get prefix, row distribution, and block size */
1010   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1011   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1012   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1013   if (rstart/bs*bs != rstart || rend/bs*bs != rend) {
1014     SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1015   }
1016   /* Get diagonal block from matrix if possible */
1017   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1018   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1019   if (f) {
1020     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1021   } else if (size == 1) {
1022     iscopy = PETSC_FALSE; Ad = A;
1023   } else {
1024     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1025   }
1026   if (Ad) {
1027     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1028     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1029   }
1030   if (Ad && n > 1) {
1031     PetscTruth match,done;
1032     /* Try to setup a good matrix partitioning if available */
1033     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1034     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1035     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1036     ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr);
1037     if (!match) {
1038       ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr);
1039     }
1040     if (!match) { /* assume a "good" partitioner is available */
1041       PetscInt na,*ia,*ja;
1042       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1043       if (done) {
1044 	/* Build adjacency matrix by hand. Unfortunately a call to
1045 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1046 	   remove the block-aij structure and we cannot expect
1047 	   MatPartitioning to split vertices as we need */
1048 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1049 	nnz = 0;
1050 	for (i=0; i<na; i++) { /* count number of nonzeros */
1051 	  len = ia[i+1] - ia[i];
1052 	  row = ja + ia[i];
1053 	  for (j=0; j<len; j++) {
1054 	    if (row[j] == i) { /* don't count diagonal */
1055 	      len--; break;
1056 	    }
1057 	  }
1058 	  nnz += len;
1059 	}
1060 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1061 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1062 	nnz    = 0;
1063 	iia[0] = 0;
1064 	for (i=0; i<na; i++) { /* fill adjacency */
1065 	  cnt = 0;
1066 	  len = ia[i+1] - ia[i];
1067 	  row = ja + ia[i];
1068 	  for (j=0; j<len; j++) {
1069 	    if (row[j] != i) { /* if not diagonal */
1070 	      jja[nnz+cnt++] = row[j];
1071 	    }
1072 	  }
1073 	  nnz += cnt;
1074 	  iia[i+1] = nnz;
1075 	}
1076 	/* Partitioning of the adjacency matrix */
1077 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1078 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1079 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1080 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1081 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1082 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1083 	foundpart = PETSC_TRUE;
1084       }
1085       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1086     }
1087     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1088   }
1089   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1090 
1091   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1092   *outis = is;
1093 
1094   if (!foundpart) {
1095 
1096     /* Partitioning by contiguous chunks of rows */
1097 
1098     PetscInt mbs   = (rend-rstart)/bs;
1099     PetscInt start = rstart;
1100     for (i=0; i<n; i++) {
1101       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1102       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1103       start += count;
1104     }
1105 
1106   } else {
1107 
1108     /* Partitioning by adjacency of diagonal block  */
1109 
1110     const PetscInt *numbering;
1111     PetscInt       *count,nidx,*indices,*newidx,start=0;
1112     /* Get node count in each partition */
1113     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1114     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1115     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1116       for (i=0; i<n; i++) count[i] *= bs;
1117     }
1118     /* Build indices from node numbering */
1119     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1120     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1121     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1122     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1123     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1124     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1125     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1126       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1127       for (i=0; i<nidx; i++)
1128 	for (j=0; j<bs; j++)
1129 	  newidx[i*bs+j] = indices[i]*bs + j;
1130       ierr = PetscFree(indices);CHKERRQ(ierr);
1131       nidx   *= bs;
1132       indices = newidx;
1133     }
1134     /* Shift to get global indices */
1135     for (i=0; i<nidx; i++) indices[i] += rstart;
1136 
1137     /* Build the index sets for each block */
1138     for (i=0; i<n; i++) {
1139       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr);
1140       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1141       start += count[i];
1142     }
1143 
1144     ierr = PetscFree(count);
1145     ierr = PetscFree(indices);
1146     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1147     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1148 
1149   }
1150 
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "PCASMDestroySubdomains"
1156 /*@C
1157    PCASMDestroySubdomains - Destroys the index sets created with
1158    PCASMCreateSubdomains(). Should be called after setting subdomains
1159    with PCASMSetLocalSubdomains().
1160 
1161    Collective
1162 
1163    Input Parameters:
1164 +  n - the number of index sets
1165 -  is - the array of index sets
1166 
1167    Level: advanced
1168 
1169 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1170 
1171 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1172 @*/
1173 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[])
1174 {
1175   PetscInt       i;
1176   PetscErrorCode ierr;
1177   PetscFunctionBegin;
1178   if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1179   PetscValidPointer(is,2);
1180   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1181   ierr = PetscFree(is);CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "PCASMCreateSubdomains2D"
1187 /*@
1188    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1189    preconditioner for a two-dimensional problem on a regular grid.
1190 
1191    Not Collective
1192 
1193    Input Parameters:
1194 +  m, n - the number of mesh points in the x and y directions
1195 .  M, N - the number of subdomains in the x and y directions
1196 .  dof - degrees of freedom per node
1197 -  overlap - overlap in mesh lines
1198 
1199    Output Parameters:
1200 +  Nsub - the number of subdomains created
1201 -  is - the array of index sets defining the subdomains
1202 
1203    Note:
1204    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1205    preconditioners.  More general related routines are
1206    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1207 
1208    Level: advanced
1209 
1210 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1211 
1212 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1213           PCASMSetOverlap()
1214 @*/
1215 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is)
1216 {
1217   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter;
1218   PetscErrorCode ierr;
1219   PetscInt       nidx,*idx,loc,ii,jj,count;
1220 
1221   PetscFunctionBegin;
1222   if (dof != 1) SETERRQ(PETSC_ERR_SUP," ");
1223 
1224   *Nsub = N*M;
1225   ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr);
1226   ystart = 0;
1227   loc_outter = 0;
1228   for (i=0; i<N; i++) {
1229     height = n/N + ((n % N) > i); /* height of subdomain */
1230     if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1231     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1232     yright = ystart + height + overlap; if (yright > n) yright = n;
1233     xstart = 0;
1234     for (j=0; j<M; j++) {
1235       width = m/M + ((m % M) > j); /* width of subdomain */
1236       if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1237       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1238       xright = xstart + width + overlap; if (xright > m) xright = m;
1239       nidx   = (xright - xleft)*(yright - yleft);
1240       ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1241       loc    = 0;
1242       for (ii=yleft; ii<yright; ii++) {
1243         count = m*ii + xleft;
1244         for (jj=xleft; jj<xright; jj++) {
1245           idx[loc++] = count++;
1246         }
1247       }
1248       ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr);
1249       ierr = PetscFree(idx);CHKERRQ(ierr);
1250       xstart += width;
1251     }
1252     ystart += height;
1253   }
1254   for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "PCASMGetLocalSubdomains"
1260 /*@C
1261     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1262     only) for the additive Schwarz preconditioner.
1263 
1264     Collective on PC
1265 
1266     Input Parameter:
1267 .   pc - the preconditioner context
1268 
1269     Output Parameters:
1270 +   n - the number of subdomains for this processor (default value = 1)
1271 -   is - the index sets that define the subdomains for this processor
1272 
1273 
1274     Notes:
1275     The IS numbering is in the parallel, global numbering of the vector.
1276 
1277     Level: advanced
1278 
1279 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1280 
1281 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1282           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1283 @*/
1284 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[])
1285 {
1286   PC_ASM         *osm;
1287   PetscErrorCode ierr;
1288   PetscTruth     match;
1289 
1290   PetscFunctionBegin;
1291   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1292   PetscValidIntPointer(n,2);
1293   if (is) PetscValidPointer(is,3);
1294   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1295   if (!match) {
1296     if (n)  *n  = 0;
1297     if (is) *is = PETSC_NULL;
1298   } else {
1299     osm = (PC_ASM*)pc->data;
1300     if (n)  *n  = osm->n_local_true;
1301     if (is) *is = osm->is;
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "PCASMGetLocalSubmatrices"
1308 /*@C
1309     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1310     only) for the additive Schwarz preconditioner.
1311 
1312     Collective on PC
1313 
1314     Input Parameter:
1315 .   pc - the preconditioner context
1316 
1317     Output Parameters:
1318 +   n - the number of matrices for this processor (default value = 1)
1319 -   mat - the matrices
1320 
1321 
1322     Level: advanced
1323 
1324 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1325 
1326 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1327           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1328 @*/
1329 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1330 {
1331   PC_ASM         *osm;
1332   PetscErrorCode ierr;
1333   PetscTruth     match;
1334 
1335   PetscFunctionBegin;
1336   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1337   PetscValidIntPointer(n,2);
1338   if (mat) PetscValidPointer(mat,3);
1339   if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1340   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1341   if (!match) {
1342     if (n)   *n   = 0;
1343     if (mat) *mat = PETSC_NULL;
1344   } else {
1345     osm = (PC_ASM*)pc->data;
1346     if (n)   *n   = osm->n_local_true;
1347     if (mat) *mat = osm->pmat;
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351