xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
74     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
75     ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
76     ierr = PetscFree(cidx);CHKERRQ(ierr);
77     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
78     if (osm->is_local) {
79       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
80       ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
81       /*
82        No more than 15 characters per index plus a space.
83        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
84        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
85        For nidx == 0, the whole string 16 '\0'.
86        */
87       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
88       ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
89       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
90       for(j = 0; j < nidx; ++j) {
91         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
92       }
93       ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
94       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
95 
96       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr);
98       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
99       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
100       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
101       ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
102       ierr = PetscFree(cidx);CHKERRQ(ierr);
103       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "PCView_GASM"
112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
113 {
114   PC_GASM         *osm = (PC_GASM*)pc->data;
115   const char     *prefix;
116   PetscErrorCode ierr;
117   PetscMPIInt    rank, size;
118   PetscInt       i,bsz;
119   PetscBool      iascii,isstring, print_subdomains;
120   PetscViewer    sviewer;
121 
122 
123   PetscFunctionBegin;
124   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
125   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
126   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
127   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
128   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
129   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
130   if (iascii) {
131     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
132     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
133     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
134     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
135     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
136     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
137     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
138     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
139     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
140     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
141     if (osm->same_local_solves) {
142       if (osm->ksp) {
143         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
144         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
145         if (!rank) {
146           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
147           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
148           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
149         }
150         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
151       }
152     } else {
153       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
154       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
155       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
156       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
157       for (i=0; i<osm->nmax; i++) {
158         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
159         if (i < osm->n) {
160           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
161           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr);
162           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
163           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
164           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
165           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
166           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr);
167         }
168         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169       }
170       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
171       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172     }
173   } else if (isstring) {
174     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
175     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
176     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
178   } else {
179     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
180   }
181   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
182   if(print_subdomains) {
183     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 
189 
190 
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "PCSetUp_GASM"
194 static PetscErrorCode PCSetUp_GASM(PC pc)
195 {
196   PC_GASM         *osm  = (PC_GASM*)pc->data;
197   PetscErrorCode ierr;
198   PetscBool      symset,flg;
199   PetscInt       i;
200   PetscMPIInt    rank, size;
201   MatReuse       scall = MAT_REUSE_MATRIX;
202   KSP            ksp;
203   PC             subpc;
204   const char     *prefix,*pprefix;
205   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
206   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
207   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
208   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
209   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
210   Vec            x,y;
211   PetscScalar    *gxarray, *gyarray;
212   PetscInt       gfirst, glast;
213 
214   PetscFunctionBegin;
215   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
216   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
217   if (!pc->setupcalled) {
218 
219     if (!osm->type_set) {
220       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
221       if (symset && flg) { osm->type = PC_GASM_BASIC; }
222     }
223 
224     if (osm->N == PETSC_DECIDE && osm->n < 1) {
225       /* no subdomains given, use one per processor */
226       osm->nmax = osm->n = 1;
227       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
228       osm->N = size;
229     } else if (osm->N == PETSC_DECIDE) {
230       PetscInt inwork[2], outwork[2];
231       /* determine global number of subdomains and the max number of local subdomains */
232       inwork[0] = inwork[1] = osm->n;
233       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
234       osm->nmax = outwork[0];
235       osm->N    = outwork[1];
236     }
237     if (!osm->is){ /* create the index sets */
238       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
239     }
240     if (!osm->is_local) {
241       /*
242 	 This indicates that osm->is should define a nonoverlapping decomposition
243 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
244 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
245 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
246 	 Therefore, osm->is will be used to define osm->is_local.
247 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
248 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
249 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
250       */
251       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
252       for (i=0; i<osm->n; i++) {
253         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
254           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
255           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
256         } else {
257           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
258           osm->is_local[i] = osm->is[i];
259         }
260       }
261     }
262     /* Beyond this point osm->is_local is not null. */
263     if (osm->overlap > 0) {
264       /* Extend the "overlapping" regions by a number of steps */
265       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
266     }
267     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
268     flg  = PETSC_FALSE;
269     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
270     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
271 
272     if (osm->sort_indices) {
273       for (i=0; i<osm->n; i++) {
274         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
275 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
276       }
277     }
278     /* Merge the ISs, create merged vectors and scatter contexts. */
279     /* Restriction ISs. */
280     ddn = 0;
281     for (i=0; i<osm->n; i++) {
282       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
283       ddn += dn;
284     }
285     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
286     ddn = 0;
287     for (i=0; i<osm->n; i++) {
288       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
289       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
290       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
291       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
292       ddn += dn;
293     }
294     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
295     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
296     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
297     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
298     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
299     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
300     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
301     ierr = ISDestroy(&gid);CHKERRQ(ierr);
302     /* Prolongation ISs */
303     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
304       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
305       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
306       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
307       /**/
308       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
309       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
310       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
311       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
312       PetscInt               j;
313       ddn_local = 0;
314       for (i=0; i<osm->n; i++) {
315 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
316 	ddn_local += dn_local;
317       }
318       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
319       ddn_local = 0;
320       for (i=0; i<osm->n; i++) {
321 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
322 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
323 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
324 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
325 	ddn_local += dn_local;
326       }
327       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
328       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
329       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
330       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
331       ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
332       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
333       /* Now convert these localized indices into the global indices into the merged output vector. */
334       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
335       for(j=0; j < ddn_llocal; ++j) {
336 	ddidx_llocal[j] += gfirst;
337       }
338       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
339       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
340       ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr);
341     }
342     /* Create the subdomain work vectors. */
343     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
344     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
345     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
346     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
347     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
348     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
349       PetscInt dN;
350       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
351       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
352       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
353       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
354     }
355     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
356     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
357     ierr = VecDestroy(&x);CHKERRQ(ierr);
358     ierr = VecDestroy(&y);CHKERRQ(ierr);
359     /* Create the local solvers */
360     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
361     for (i=0; i<osm->n; i++) { /* KSPs are local */
362       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
363       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
364       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
365       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
366       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
367       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
368       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
369       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
370       osm->ksp[i] = ksp;
371     }
372     scall = MAT_INITIAL_MATRIX;
373 
374   } else {
375     /*
376        Destroy the blocks from the previous iteration
377     */
378     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
379       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
380       scall = MAT_INITIAL_MATRIX;
381     }
382   }
383 
384   /*
385      Extract out the submatrices.
386   */
387   if(size > 1) {
388     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
389   }
390   else {
391     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
392   }
393   if (scall == MAT_INITIAL_MATRIX) {
394     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
395     for (i=0; i<osm->n; i++) {
396       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
397       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
398     }
399   }
400 
401   /* Return control to the user so that the submatrices can be modified (e.g., to apply
402      different boundary conditions for the submatrices than for the global problem) */
403   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
404 
405   /*
406      Loop over submatrices putting them into local ksp
407   */
408   for (i=0; i<osm->n; i++) {
409     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
410     if (!pc->setupcalled) {
411       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
412     }
413   }
414 
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
420 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
421 {
422   PC_GASM         *osm = (PC_GASM*)pc->data;
423   PetscErrorCode ierr;
424   PetscInt       i;
425 
426   PetscFunctionBegin;
427   for (i=0; i<osm->n; i++) {
428     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
429   }
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "PCApply_GASM"
435 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
436 {
437   PC_GASM         *osm = (PC_GASM*)pc->data;
438   PetscErrorCode ierr;
439   PetscInt       i;
440   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
441 
442   PetscFunctionBegin;
443   /*
444      Support for limiting the restriction or interpolation to only local
445      subdomain values (leaving the other values 0).
446   */
447   if (!(osm->type & PC_GASM_RESTRICT)) {
448     forward = SCATTER_FORWARD_LOCAL;
449     /* have to zero the work RHS since scatter may leave some slots empty */
450     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
451   }
452   if (!(osm->type & PC_GASM_INTERPOLATE)) {
453     reverse = SCATTER_REVERSE_LOCAL;
454   }
455 
456   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
457   ierr = VecZeroEntries(y);CHKERRQ(ierr);
458   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
459   /* do the subdomain solves */
460   for (i=0; i<osm->n; ++i) {
461     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
462   }
463   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
464   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "PCApplyTranspose_GASM"
470 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
471 {
472   PC_GASM         *osm = (PC_GASM*)pc->data;
473   PetscErrorCode ierr;
474   PetscInt       i;
475   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
476 
477   PetscFunctionBegin;
478   /*
479      Support for limiting the restriction or interpolation to only local
480      subdomain values (leaving the other values 0).
481 
482      Note: these are reversed from the PCApply_GASM() because we are applying the
483      transpose of the three terms
484   */
485   if (!(osm->type & PC_GASM_INTERPOLATE)) {
486     forward = SCATTER_FORWARD_LOCAL;
487     /* have to zero the work RHS since scatter may leave some slots empty */
488     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
489   }
490   if (!(osm->type & PC_GASM_RESTRICT)) {
491     reverse = SCATTER_REVERSE_LOCAL;
492   }
493 
494   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
495   ierr = VecZeroEntries(y);CHKERRQ(ierr);
496   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
497   /* do the local solves */
498   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
499     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
500   }
501   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
502   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "PCReset_GASM"
508 static PetscErrorCode PCReset_GASM(PC pc)
509 {
510   PC_GASM        *osm = (PC_GASM*)pc->data;
511   PetscErrorCode ierr;
512   PetscInt       i;
513 
514   PetscFunctionBegin;
515   if (osm->ksp) {
516     for (i=0; i<osm->n; i++) {
517       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
518     }
519   }
520   if (osm->pmat) {
521     if (osm->n > 0) {
522       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
523     }
524   }
525   if (osm->x) {
526     for (i=0; i<osm->n; i++) {
527       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
528       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
529     }
530   }
531   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
532   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
533 
534   ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr);
535   ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr);
536   if (osm->is) {ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr); osm->is = 0;}
537   ierr = ISDestroy(&osm->gis);CHKERRQ(ierr);
538   ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "PCDestroy_GASM"
544 static PetscErrorCode PCDestroy_GASM(PC pc)
545 {
546   PC_GASM         *osm = (PC_GASM*)pc->data;
547   PetscErrorCode ierr;
548   PetscInt       i;
549 
550   PetscFunctionBegin;
551   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
552   if (osm->ksp) {
553     for (i=0; i<osm->n; i++) {
554       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
555     }
556     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
557   }
558   ierr = PetscFree(osm->x);CHKERRQ(ierr);
559   ierr = PetscFree(osm->y);CHKERRQ(ierr);
560   ierr = PetscFree(pc->data);CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "PCSetFromOptions_GASM"
566 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
567   PC_GASM         *osm = (PC_GASM*)pc->data;
568   PetscErrorCode ierr;
569   PetscInt       blocks,ovl;
570   PetscBool      symset,flg;
571   PCGASMType      gasmtype;
572 
573   PetscFunctionBegin;
574   /* set the type to symmetric if matrix is symmetric */
575   if (!osm->type_set && pc->pmat) {
576     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
577     if (symset && flg) { osm->type = PC_GASM_BASIC; }
578   }
579   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
580     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
581     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
582     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
583     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
584     flg  = PETSC_FALSE;
585     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
586     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
587   ierr = PetscOptionsTail();CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 /*------------------------------------------------------------------------------------*/
592 
593 EXTERN_C_BEGIN
594 #undef __FUNCT__
595 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
596 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
597 {
598   PC_GASM         *osm = (PC_GASM*)pc->data;
599   PetscErrorCode ierr;
600   PetscInt       i;
601 
602   PetscFunctionBegin;
603   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
604   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
605 
606   if (!pc->setupcalled) {
607     osm->n            = n;
608     osm->is           = 0;
609     osm->is_local     = 0;
610     if (is) {
611       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
612     }
613     if (is_local) {
614       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
615     }
616     if (osm->is) {
617       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
618     }
619     if (is) {
620       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
621       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
622       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
623       osm->overlap = -1;
624     }
625     if (is_local) {
626       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
627       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 EXTERN_C_END
633 
634 EXTERN_C_BEGIN
635 #undef __FUNCT__
636 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
637 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
638   PC_GASM         *osm = (PC_GASM*)pc->data;
639   PetscErrorCode ierr;
640   PetscMPIInt    rank,size;
641   PetscInt       n;
642 
643   PetscFunctionBegin;
644   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
645 
646   /*
647      Split the subdomains equally among all processors
648   */
649   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
650   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
651   n = N/size + ((N % size) > rank);
652   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
653   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
654   if (!pc->setupcalled) {
655     if (osm->is) {
656       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
657     }
658     osm->N            = N;
659     osm->n            = n;
660     osm->is           = 0;
661     osm->is_local     = 0;
662   }
663   PetscFunctionReturn(0);
664 }/* PCGASMSetTotalSubdomains_GASM() */
665 EXTERN_C_END
666 
667 EXTERN_C_BEGIN
668 #undef __FUNCT__
669 #define __FUNCT__ "PCGASMSetOverlap_GASM"
670 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
671 {
672   PC_GASM *osm = (PC_GASM*)pc->data;
673 
674   PetscFunctionBegin;
675   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
676   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
677   if (!pc->setupcalled) {
678     osm->overlap = ovl;
679   }
680   PetscFunctionReturn(0);
681 }
682 EXTERN_C_END
683 
684 EXTERN_C_BEGIN
685 #undef __FUNCT__
686 #define __FUNCT__ "PCGASMSetType_GASM"
687 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
688 {
689   PC_GASM *osm = (PC_GASM*)pc->data;
690 
691   PetscFunctionBegin;
692   osm->type     = type;
693   osm->type_set = PETSC_TRUE;
694   PetscFunctionReturn(0);
695 }
696 EXTERN_C_END
697 
698 EXTERN_C_BEGIN
699 #undef __FUNCT__
700 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
701 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
702 {
703   PC_GASM *osm = (PC_GASM*)pc->data;
704 
705   PetscFunctionBegin;
706   osm->sort_indices = doSort;
707   PetscFunctionReturn(0);
708 }
709 EXTERN_C_END
710 
711 EXTERN_C_BEGIN
712 #undef __FUNCT__
713 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
714 /*
715    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
716         In particular, it would upset the global subdomain number calculation.
717 */
718 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
719 {
720   PC_GASM         *osm = (PC_GASM*)pc->data;
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
725 
726   if (n) {
727     *n = osm->n;
728   }
729   if (first) {
730     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
731     *first -= osm->n;
732   }
733   if (ksp) {
734     /* Assume that local solves are now different; not necessarily
735        true though!  This flag is used only for PCView_GASM() */
736     *ksp                   = osm->ksp;
737     osm->same_local_solves = PETSC_FALSE;
738   }
739   PetscFunctionReturn(0);
740 }/* PCGASMGetSubKSP_GASM() */
741 EXTERN_C_END
742 
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "PCGASMSetLocalSubdomains"
746 /*@C
747     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
748     only) for the additive Schwarz preconditioner.
749 
750     Collective on PC
751 
752     Input Parameters:
753 +   pc - the preconditioner context
754 .   n - the number of subdomains for this processor (default value = 1)
755 .   is - the index set that defines the subdomains for this processor
756          (or PETSC_NULL for PETSc to determine subdomains)
757 -   is_local - the index sets that define the local part of the subdomains for this processor
758          (or PETSC_NULL to use the default of 1 subdomain per process)
759 
760     Notes:
761     The IS numbering is in the parallel, global numbering of the vector.
762 
763     By default the GASM preconditioner uses 1 block per processor.
764 
765     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
766 
767     Level: advanced
768 
769 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
770 
771 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
772           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
773 @*/
774 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
775 {
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
780   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "PCGASMSetTotalSubdomains"
786 /*@C
787     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
788     additive Schwarz preconditioner.  Either all or no processors in the
789     PC communicator must call this routine, with the same index sets.
790 
791     Collective on PC
792 
793     Input Parameters:
794 +   pc - the preconditioner context
795 .   n - the number of subdomains for all processors
796 .   is - the index sets that define the subdomains for all processor
797          (or PETSC_NULL for PETSc to determine subdomains)
798 -   is_local - the index sets that define the local part of the subdomains for this processor
799          (or PETSC_NULL to use the default of 1 subdomain per process)
800 
801     Options Database Key:
802     To set the total number of subdomain blocks rather than specify the
803     index sets, use the option
804 .    -pc_gasm_blocks <blks> - Sets total blocks
805 
806     Notes:
807     Currently you cannot use this to set the actual subdomains with the argument is.
808 
809     By default the GASM preconditioner uses 1 block per processor.
810 
811     These index sets cannot be destroyed until after completion of the
812     linear solves for which the GASM preconditioner is being used.
813 
814     Use PCGASMSetLocalSubdomains() to set local subdomains.
815 
816     Level: advanced
817 
818 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
819 
820 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
821           PCGASMCreateSubdomains2D()
822 @*/
823 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
824 {
825   PetscErrorCode ierr;
826 
827   PetscFunctionBegin;
828   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
829   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "PCGASMSetOverlap"
835 /*@
836     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
837     additive Schwarz preconditioner.  Either all or no processors in the
838     PC communicator must call this routine.
839 
840     Logically Collective on PC
841 
842     Input Parameters:
843 +   pc  - the preconditioner context
844 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
845 
846     Options Database Key:
847 .   -pc_gasm_overlap <ovl> - Sets overlap
848 
849     Notes:
850     By default the GASM preconditioner uses 1 block per processor.  To use
851     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
852     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
853 
854     The overlap defaults to 1, so if one desires that no additional
855     overlap be computed beyond what may have been set with a call to
856     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
857     must be set to be 0.  In particular, if one does not explicitly set
858     the subdomains an application code, then all overlap would be computed
859     internally by PETSc, and using an overlap of 0 would result in an GASM
860     variant that is equivalent to the block Jacobi preconditioner.
861 
862     Note that one can define initial index sets with any overlap via
863     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
864     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
865     if desired.
866 
867     Level: intermediate
868 
869 .keywords: PC, GASM, set, overlap
870 
871 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
872           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
873 @*/
874 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
875 {
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
880   PetscValidLogicalCollectiveInt(pc,ovl,2);
881   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
882   PetscFunctionReturn(0);
883 }
884 
885 #undef __FUNCT__
886 #define __FUNCT__ "PCGASMSetType"
887 /*@
888     PCGASMSetType - Sets the type of restriction and interpolation used
889     for local problems in the additive Schwarz method.
890 
891     Logically Collective on PC
892 
893     Input Parameters:
894 +   pc  - the preconditioner context
895 -   type - variant of GASM, one of
896 .vb
897       PC_GASM_BASIC       - full interpolation and restriction
898       PC_GASM_RESTRICT    - full restriction, local processor interpolation
899       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
900       PC_GASM_NONE        - local processor restriction and interpolation
901 .ve
902 
903     Options Database Key:
904 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
905 
906     Level: intermediate
907 
908 .keywords: PC, GASM, set, type
909 
910 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
911           PCGASMCreateSubdomains2D()
912 @*/
913 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
914 {
915   PetscErrorCode ierr;
916 
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
919   PetscValidLogicalCollectiveEnum(pc,type,2);
920   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "PCGASMSetSortIndices"
926 /*@
927     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
928 
929     Logically Collective on PC
930 
931     Input Parameters:
932 +   pc  - the preconditioner context
933 -   doSort - sort the subdomain indices
934 
935     Level: intermediate
936 
937 .keywords: PC, GASM, set, type
938 
939 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
940           PCGASMCreateSubdomains2D()
941 @*/
942 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
943 {
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
948   PetscValidLogicalCollectiveBool(pc,doSort,2);
949   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "PCGASMGetSubKSP"
955 /*@C
956    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
957    this processor.
958 
959    Collective on PC iff first_local is requested
960 
961    Input Parameter:
962 .  pc - the preconditioner context
963 
964    Output Parameters:
965 +  n_local - the number of blocks on this processor or PETSC_NULL
966 .  first_local - the global number of the first block on this processor or PETSC_NULL,
967                  all processors must request or all must pass PETSC_NULL
968 -  ksp - the array of KSP contexts
969 
970    Note:
971    After PCGASMGetSubKSP() the array of KSPes is not to be freed
972 
973    Currently for some matrix implementations only 1 block per processor
974    is supported.
975 
976    You must call KSPSetUp() before calling PCGASMGetSubKSP().
977 
978    Level: advanced
979 
980 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
981 
982 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
983           PCGASMCreateSubdomains2D(),
984 @*/
985 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
991   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 /* -------------------------------------------------------------------------------------*/
996 /*MC
997    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
998            its own KSP object.
999 
1000    Options Database Keys:
1001 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
1002 .  -pc_gasm_blocks <blks> - Sets total blocks
1003 .  -pc_gasm_overlap <ovl> - Sets overlap
1004 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1005 
1006      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1007       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1008       -pc_gasm_type basic to use the standard GASM.
1009 
1010    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1011      than one processor. Defaults to one block per processor.
1012 
1013      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1014         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1015 
1016      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1017          and set the options directly on the resulting KSP object (you can access its PC
1018          with KSPGetPC())
1019 
1020 
1021    Level: beginner
1022 
1023    Concepts: additive Schwarz method
1024 
1025     References:
1026     An additive variant of the Schwarz alternating method for the case of many subregions
1027     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1028 
1029     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1030     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1031 
1032 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1033            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1034            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1035 
1036 M*/
1037 
1038 EXTERN_C_BEGIN
1039 #undef __FUNCT__
1040 #define __FUNCT__ "PCCreate_GASM"
1041 PetscErrorCode  PCCreate_GASM(PC pc)
1042 {
1043   PetscErrorCode ierr;
1044   PC_GASM         *osm;
1045 
1046   PetscFunctionBegin;
1047   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1048   osm->N                 = PETSC_DECIDE;
1049   osm->n                 = 0;
1050   osm->nmax              = 0;
1051   osm->overlap           = 1;
1052   osm->ksp               = 0;
1053   osm->grestriction      = 0;
1054   osm->gprolongation     = 0;
1055   osm->gx                = 0;
1056   osm->gy                = 0;
1057   osm->x                 = 0;
1058   osm->y                 = 0;
1059   osm->is                = 0;
1060   osm->is_local          = 0;
1061   osm->pmat              = 0;
1062   osm->type              = PC_GASM_RESTRICT;
1063   osm->same_local_solves = PETSC_TRUE;
1064   osm->sort_indices      = PETSC_TRUE;
1065 
1066   pc->data                   = (void*)osm;
1067   pc->ops->apply             = PCApply_GASM;
1068   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1069   pc->ops->setup             = PCSetUp_GASM;
1070   pc->ops->reset             = PCReset_GASM;
1071   pc->ops->destroy           = PCDestroy_GASM;
1072   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1073   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1074   pc->ops->view              = PCView_GASM;
1075   pc->ops->applyrichardson   = 0;
1076 
1077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1078                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1079   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1080                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1082                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1084                     PCGASMSetType_GASM);CHKERRQ(ierr);
1085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1086                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1088                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 EXTERN_C_END
1092 
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "PCGASMCreateSubdomains"
1096 /*@C
1097    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1098    preconditioner for a any problem on a general grid.
1099 
1100    Collective
1101 
1102    Input Parameters:
1103 +  A - The global matrix operator
1104 -  n - the number of local blocks
1105 
1106    Output Parameters:
1107 .  outis - the array of index sets defining the subdomains
1108 
1109    Level: advanced
1110 
1111    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1112     from these if you use PCGASMSetLocalSubdomains()
1113 
1114     In the Fortran version you must provide the array outis[] already allocated of length n.
1115 
1116 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1117 
1118 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1119 @*/
1120 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1121 {
1122   MatPartitioning           mpart;
1123   const char                *prefix;
1124   PetscErrorCode            (*f)(Mat,MatReuse,Mat*);
1125   PetscMPIInt               size;
1126   PetscInt                  i,j,rstart,rend,bs;
1127   PetscBool                 isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1128   Mat                       Ad = PETSC_NULL, adj;
1129   IS                        ispart,isnumb,*is;
1130   PetscErrorCode            ierr;
1131 
1132   PetscFunctionBegin;
1133   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1134   PetscValidPointer(outis,3);
1135   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1136 
1137   /* Get prefix, row distribution, and block size */
1138   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1139   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1140   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1141   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1142 
1143   /* Get diagonal block from matrix if possible */
1144   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1145   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1146   if (f) {
1147     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1148   } else if (size == 1) {
1149     Ad = A;
1150   }
1151   if (Ad) {
1152     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1153     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1154   }
1155   if (Ad && n > 1) {
1156     PetscBool  match,done;
1157     /* Try to setup a good matrix partitioning if available */
1158     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1159     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1160     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1161     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1162     if (!match) {
1163       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1164     }
1165     if (!match) { /* assume a "good" partitioner is available */
1166       PetscInt na,*ia,*ja;
1167       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1168       if (done) {
1169         /* Build adjacency matrix by hand. Unfortunately a call to
1170            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1171            remove the block-aij structure and we cannot expect
1172            MatPartitioning to split vertices as we need */
1173         PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1174         nnz = 0;
1175         for (i=0; i<na; i++) { /* count number of nonzeros */
1176           len = ia[i+1] - ia[i];
1177           row = ja + ia[i];
1178           for (j=0; j<len; j++) {
1179             if (row[j] == i) { /* don't count diagonal */
1180               len--; break;
1181             }
1182           }
1183           nnz += len;
1184         }
1185         ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1186         ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1187         nnz    = 0;
1188         iia[0] = 0;
1189         for (i=0; i<na; i++) { /* fill adjacency */
1190           cnt = 0;
1191           len = ia[i+1] - ia[i];
1192           row = ja + ia[i];
1193           for (j=0; j<len; j++) {
1194             if (row[j] != i) { /* if not diagonal */
1195               jja[nnz+cnt++] = row[j];
1196             }
1197           }
1198           nnz += cnt;
1199           iia[i+1] = nnz;
1200         }
1201         /* Partitioning of the adjacency matrix */
1202         ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1203         ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1204         ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1205         ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1206         ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1207         ierr = MatDestroy(&adj);CHKERRQ(ierr);
1208         foundpart = PETSC_TRUE;
1209       }
1210       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1211     }
1212     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1213   }
1214 
1215   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1216   *outis = is;
1217 
1218   if (!foundpart) {
1219 
1220     /* Partitioning by contiguous chunks of rows */
1221 
1222     PetscInt mbs   = (rend-rstart)/bs;
1223     PetscInt start = rstart;
1224     for (i=0; i<n; i++) {
1225       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1226       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1227       start += count;
1228     }
1229 
1230   } else {
1231 
1232     /* Partitioning by adjacency of diagonal block  */
1233 
1234     const PetscInt *numbering;
1235     PetscInt       *count,nidx,*indices,*newidx,start=0;
1236     /* Get node count in each partition */
1237     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1238     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1239     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1240       for (i=0; i<n; i++) count[i] *= bs;
1241     }
1242     /* Build indices from node numbering */
1243     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1244     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1245     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1246     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1247     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1248     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1249     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1250       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1251       for (i=0; i<nidx; i++)
1252         for (j=0; j<bs; j++)
1253           newidx[i*bs+j] = indices[i]*bs + j;
1254       ierr = PetscFree(indices);CHKERRQ(ierr);
1255       nidx   *= bs;
1256       indices = newidx;
1257     }
1258     /* Shift to get global indices */
1259     for (i=0; i<nidx; i++) indices[i] += rstart;
1260 
1261     /* Build the index sets for each block */
1262     for (i=0; i<n; i++) {
1263       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1264       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1265       start += count[i];
1266     }
1267 
1268     ierr = PetscFree(count);
1269     ierr = PetscFree(indices);
1270     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1271     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1272   }
1273 
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "PCGASMDestroySubdomains"
1279 /*@C
1280    PCGASMDestroySubdomains - Destroys the index sets created with
1281    PCGASMCreateSubdomains(). Should be called after setting subdomains
1282    with PCGASMSetLocalSubdomains().
1283 
1284    Collective
1285 
1286    Input Parameters:
1287 +  n - the number of index sets
1288 .  is - the array of index sets
1289 -  is_local - the array of local index sets, can be PETSC_NULL
1290 
1291    Level: advanced
1292 
1293 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1294 
1295 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1296 @*/
1297 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1298 {
1299   PetscInt       i;
1300   PetscErrorCode ierr;
1301   PetscFunctionBegin;
1302   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1303   PetscValidPointer(is,2);
1304   for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); }
1305   ierr = PetscFree(is);CHKERRQ(ierr);
1306   if (is_local) {
1307     PetscValidPointer(is_local,3);
1308     for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); }
1309     ierr = PetscFree(is_local);CHKERRQ(ierr);
1310   }
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 
1315 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1316 {                                                                                                       \
1317  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1318   /*                                                                                                    \
1319    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1320    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1321    subdomain).                                                                                          \
1322    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1323    of the intersection.                                                                                 \
1324   */                                                                                                    \
1325   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1326   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1327   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1328   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1329   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1330   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1331   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1332   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1333   /* Now compute the size of the local subdomain n. */ \
1334   *n = 0;                                               \
1335   if(*ylow_loc < *yhigh_loc) {                           \
1336     PetscInt width = xright-xleft;                     \
1337     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1338     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1339     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1340   }\
1341 }
1342 
1343 
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1347 /*@
1348    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1349    preconditioner for a two-dimensional problem on a regular grid.
1350 
1351    Collective
1352 
1353    Input Parameters:
1354 +  M, N - the global number of mesh points in the x and y directions
1355 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1356 .  dof - degrees of freedom per node
1357 -  overlap - overlap in mesh lines
1358 
1359    Output Parameters:
1360 +  Nsub - the number of local subdomains created
1361 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1362 -  is_local - array of index sets defining non-overlapping subdomains
1363 
1364    Note:
1365    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1366    preconditioners.  More general related routines are
1367    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1368 
1369    Level: advanced
1370 
1371 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1372 
1373 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1374           PCGASMSetOverlap()
1375 @*/
1376 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1377 {
1378   PetscErrorCode ierr;
1379   PetscMPIInt    size, rank;
1380   PetscInt       i, j;
1381   PetscInt       maxheight, maxwidth;
1382   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1383   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1384   PetscInt       x[2][2], y[2][2], n[2];
1385   PetscInt       first, last;
1386   PetscInt       nidx, *idx;
1387   PetscInt       ii,jj,s,q,d;
1388   PetscInt       k,kk;
1389   PetscMPIInt    color;
1390   MPI_Comm       comm, subcomm;
1391   IS             **iis = 0;
1392 
1393   PetscFunctionBegin;
1394   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1395   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1396   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1397   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1398   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1399 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1400 
1401   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1402   s = 0;
1403   ystart = 0;
1404   for (j=0; j<Ndomains; ++j) {
1405     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1406     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1407     /* Vertical domain limits with an overlap. */
1408     ylow = PetscMax(ystart - overlap,0);
1409     yhigh = PetscMin(ystart + maxheight + overlap,N);
1410     xstart = 0;
1411     for (i=0; i<Mdomains; ++i) {
1412       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1413       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1414       /* Horizontal domain limits with an overlap. */
1415       xleft   = PetscMax(xstart - overlap,0);
1416       xright  = PetscMin(xstart + maxwidth + overlap,M);
1417       /*
1418 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1419       */
1420       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1421       if(nidx) {
1422         ++s;
1423       }
1424       xstart += maxwidth;
1425     }/* for(i = 0; i < Mdomains; ++i) */
1426     ystart += maxheight;
1427   }/* for(j = 0; j < Ndomains; ++j) */
1428   /* Now we can allocate the necessary number of ISs. */
1429   *nsub = s;
1430   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1431   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1432   s = 0;
1433   ystart = 0;
1434   for (j=0; j<Ndomains; ++j) {
1435     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1436     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1437     /* Vertical domain limits with an overlap. */
1438     y[0][0] = PetscMax(ystart - overlap,0);
1439     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1440     /* Vertical domain limits without an overlap. */
1441     y[1][0] = ystart;
1442     y[1][1] = PetscMin(ystart + maxheight,N);
1443     xstart = 0;
1444     for (i=0; i<Mdomains; ++i) {
1445       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1446       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1447       /* Horizontal domain limits with an overlap. */
1448       x[0][0]  = PetscMax(xstart - overlap,0);
1449       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1450       /* Horizontal domain limits without an overlap. */
1451       x[1][0] = xstart;
1452       x[1][1] = PetscMin(xstart+maxwidth,M);
1453       /*
1454 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1455 	 Do this twice: first for the domains with overlaps, and once without.
1456 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1457       */
1458       for(q = 0; q < 2; ++q) {
1459 	/*
1460 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1461 	  according to whether the domain with an overlap or without is considered.
1462 	*/
1463 	xleft = x[q][0]; xright = x[q][1];
1464 	ylow  = y[q][0]; yhigh  = y[q][1];
1465         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1466 	nidx *= dof;
1467         n[q] = nidx;
1468         /*
1469          Based on the counted number of indices in the local domain *with an overlap*,
1470          construct a subcommunicator of all the processors supporting this domain.
1471          Observe that a domain with an overlap might have nontrivial local support,
1472          while the domain without an overlap might not.  Hence, the decision to participate
1473          in the subcommunicator must be based on the domain with an overlap.
1474          */
1475 	if (q == 0) {
1476 	  if(nidx) {
1477 	    color = 1;
1478 	  } else {
1479 	    color = MPI_UNDEFINED;
1480 	  }
1481 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1482 	}
1483         /*
1484          Proceed only if the number of local indices *with an overlap* is nonzero.
1485          */
1486         if (n[0]) {
1487           if(q == 0) {
1488             iis = is;
1489           }
1490           if (q == 1) {
1491             /*
1492              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1493              Moreover, if the overlap is zero, the two ISs are identical.
1494              */
1495             if (overlap == 0) {
1496               (*is_local)[s] = (*is)[s];
1497               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1498               continue;
1499             } else {
1500               iis = is_local;
1501               subcomm = ((PetscObject)(*is)[s])->comm;
1502             }
1503           }/* if(q == 1) */
1504           idx = PETSC_NULL;
1505 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1506           if(nidx) {
1507             k    = 0;
1508             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1509               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1510               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1511               kk = dof*(M*jj + x0);
1512               for (ii=x0; ii<x1; ++ii) {
1513                 for(d = 0; d < dof; ++d) {
1514                   idx[k++] = kk++;
1515                 }
1516               }
1517             }
1518           }
1519 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1520 	}/* if(n[0]) */
1521       }/* for(q = 0; q < 2; ++q) */
1522       if(n[0]) {
1523         ++s;
1524       }
1525       xstart += maxwidth;
1526     }/* for(i = 0; i < Mdomains; ++i) */
1527     ystart += maxheight;
1528   }/* for(j = 0; j < Ndomains; ++j) */
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1534 /*@C
1535     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1536     only) for the additive Schwarz preconditioner.
1537 
1538     Not Collective
1539 
1540     Input Parameter:
1541 .   pc - the preconditioner context
1542 
1543     Output Parameters:
1544 +   n - the number of subdomains for this processor (default value = 1)
1545 .   is - the index sets that define the subdomains for this processor
1546 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1547 
1548 
1549     Notes:
1550     The IS numbering is in the parallel, global numbering of the vector.
1551 
1552     Level: advanced
1553 
1554 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1555 
1556 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1557           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1558 @*/
1559 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1560 {
1561   PC_GASM         *osm;
1562   PetscErrorCode ierr;
1563   PetscBool      match;
1564 
1565   PetscFunctionBegin;
1566   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1567   PetscValidIntPointer(n,2);
1568   if (is) PetscValidPointer(is,3);
1569   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1570   if (!match) {
1571     if (n)  *n  = 0;
1572     if (is) *is = PETSC_NULL;
1573   } else {
1574     osm = (PC_GASM*)pc->data;
1575     if (n)  *n  = osm->n;
1576     if (is) *is = osm->is;
1577     if (is_local) *is_local = osm->is_local;
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1584 /*@C
1585     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1586     only) for the additive Schwarz preconditioner.
1587 
1588     Not Collective
1589 
1590     Input Parameter:
1591 .   pc - the preconditioner context
1592 
1593     Output Parameters:
1594 +   n - the number of matrices for this processor (default value = 1)
1595 -   mat - the matrices
1596 
1597 
1598     Level: advanced
1599 
1600 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1601 
1602 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1603           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1604 @*/
1605 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1606 {
1607   PC_GASM         *osm;
1608   PetscErrorCode ierr;
1609   PetscBool      match;
1610 
1611   PetscFunctionBegin;
1612   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1613   PetscValidIntPointer(n,2);
1614   if (mat) PetscValidPointer(mat,3);
1615   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1616   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1617   if (!match) {
1618     if (n)   *n   = 0;
1619     if (mat) *mat = PETSC_NULL;
1620   } else {
1621     osm = (PC_GASM*)pc->data;
1622     if (n)   *n   = osm->n;
1623     if (mat) *mat = osm->pmat;
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627