xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);    CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr);
71     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
72     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
73     ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
74     ierr = PetscFree(cidx); CHKERRQ(ierr);
75     ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
76     if (osm->is_local) {
77       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
78       ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr);
79       /*
80        No more than 15 characters per index plus a space.
81        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
82        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
83        For nidx == 0, the whole string 16 '\0'.
84        */
85       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
86       ierr = ISGetIndices(osm->is_local[i], &idx);    CHKERRQ(ierr);
87       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
88       for(j = 0; j < nidx; ++j) {
89         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
90       }
91       ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
92       ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr);
93 
94       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr);
95       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
96       ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
98       ierr = PetscFree(cidx); CHKERRQ(ierr);
99       ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
100     }
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "PCView_GASM"
108 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
109 {
110   PC_GASM         *osm = (PC_GASM*)pc->data;
111   const char     *prefix;
112   PetscErrorCode ierr;
113   PetscMPIInt    rank, size;
114   PetscInt       i,bsz;
115   PetscBool      iascii,isstring, print_subdomains;
116   PetscViewer    sviewer;
117 
118 
119   PetscFunctionBegin;
120   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
121   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
122   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
123   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
124   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
125   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
126   if (iascii) {
127     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
128     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
129     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
130     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
131     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
132     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
133     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
134     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
135     if (osm->same_local_solves) {
136       if (osm->ksp) {
137         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
138         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
139         if (!rank) {
140           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
141           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
142           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
143         }
144         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
145       }
146     } else {
147       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
148       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
149       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
150       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
151       for (i=0; i<osm->nmax; i++) {
152         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
153         if (i < osm->n) {
154           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
155           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
156           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
157           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
158         }
159         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
160       }
161       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
162       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
163     }
164   } else if (isstring) {
165     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
166     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
167     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169   } else {
170     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
171   }
172   ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
173   if(print_subdomains) {
174     ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr);
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 
180 
181 
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "PCSetUp_GASM"
185 static PetscErrorCode PCSetUp_GASM(PC pc)
186 {
187   PC_GASM         *osm  = (PC_GASM*)pc->data;
188   PetscErrorCode ierr;
189   PetscBool      symset,flg;
190   PetscInt       i;
191   PetscMPIInt    rank, size;
192   MatReuse       scall = MAT_REUSE_MATRIX;
193   KSP            ksp;
194   PC             subpc;
195   const char     *prefix,*pprefix;
196   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
197   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
198   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
199   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
200   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
201   Vec            x,y;
202   PetscScalar    *gxarray, *gyarray;
203   PetscInt       gfirst, glast;
204 
205   PetscFunctionBegin;
206   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
207   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
208   if (!pc->setupcalled) {
209 
210     if (!osm->type_set) {
211       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
212       if (symset && flg) { osm->type = PC_GASM_BASIC; }
213     }
214 
215     if (osm->N == PETSC_DECIDE && osm->n < 1) {
216       /* no subdomains given, use one per processor */
217       osm->nmax = osm->n = 1;
218       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
219       osm->N = size;
220     } else if (osm->N == PETSC_DECIDE) {
221       PetscInt inwork[2], outwork[2];
222       /* determine global number of subdomains and the max number of local subdomains */
223       inwork[0] = inwork[1] = osm->n;
224       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
225       osm->nmax = outwork[0];
226       osm->N    = outwork[1];
227     }
228     if (!osm->is){ /* create the index sets */
229       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
230     }
231     if (!osm->is_local) {
232       /*
233 	 This indicates that osm->is should define a nonoverlapping decomposition
234 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
235 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
236 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
237 	 Therefore, osm->is will be used to define osm->is_local.
238 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
239 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
240 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
241       */
242       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
243       for (i=0; i<osm->n; i++) {
244         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
245           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
246           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
247         } else {
248           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
249           osm->is_local[i] = osm->is[i];
250         }
251       }
252     }
253     /* Beyond this point osm->is_local is not null. */
254     if (osm->overlap > 0) {
255       /* Extend the "overlapping" regions by a number of steps */
256       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
257     }
258     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
259     flg  = PETSC_FALSE;
260     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
261     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
262 
263     if (osm->sort_indices) {
264       for (i=0; i<osm->n; i++) {
265         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
266 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
267       }
268     }
269     /* Merge the ISs, create merged vectors and scatter contexts. */
270     /* Restriction ISs. */
271     ddn = 0;
272     for (i=0; i<osm->n; i++) {
273       ierr = ISGetLocalSize(osm->is[i],&dn);          CHKERRQ(ierr);
274       ddn += dn;
275     }
276     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr);
277     ddn = 0;
278     for (i=0; i<osm->n; i++) {
279       ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr);
280       ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr);
281       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr);
282       ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr);
283       ddn += dn;
284     }
285     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr);
286     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
287     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr);
288     ierr = VecDuplicate(osm->gx,&osm->gy);                                     CHKERRQ(ierr);
289     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);                     CHKERRQ(ierr);
290     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);              CHKERRQ(ierr);
291     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));     CHKERRQ(ierr);
292     ierr = ISDestroy(gid);                                                     CHKERRQ(ierr);
293     /* Prolongation ISs */
294     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
295       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
296       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
297       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
298       /**/
299       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
300       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
301       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
302       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
303       PetscInt               j;
304       ddn_local = 0;
305       for (i=0; i<osm->n; i++) {
306 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
307 	ddn_local += dn_local;
308       }
309       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr);
310       ddn_local = 0;
311       for (i=0; i<osm->n; i++) {
312 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
313 	ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr);
314 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr);
315 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr);
316 	ddn_local += dn_local;
317       }
318       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr);
319       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr);
320       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
321       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
322       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
323       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
324       /* Now convert these localized indices into the global indices into the merged output vector. */
325       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr);
326       for(j=0; j < ddn_llocal; ++j) {
327 	ddidx_llocal[j] += gfirst;
328       }
329       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr);
330       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);              CHKERRQ(ierr);
331       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
332     }
333     /* Create the subdomain work vectors. */
334     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
335     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
336     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
337     ierr = VecGetArray(osm->gx, &gxarray);                                     CHKERRQ(ierr);
338     ierr = VecGetArray(osm->gy, &gyarray);                                     CHKERRQ(ierr);
339     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
340       PetscInt dN;
341       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
342       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
343       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
344       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
345     }
346     ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr);
347     ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr);
348     ierr = VecDestroy(x); CHKERRQ(ierr);
349     ierr = VecDestroy(y); CHKERRQ(ierr);
350     /* Create the local solvers */
351     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
352     for (i=0; i<osm->n; i++) { /* KSPs are local */
353       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
354       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
355       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
356       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
357       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
358       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
359       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
360       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
361       osm->ksp[i] = ksp;
362     }
363     scall = MAT_INITIAL_MATRIX;
364 
365   } else {
366     /*
367        Destroy the blocks from the previous iteration
368     */
369     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
370       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
371       scall = MAT_INITIAL_MATRIX;
372     }
373   }
374 
375   /*
376      Extract out the submatrices.
377   */
378   if(size > 1) {
379     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
380   }
381   else {
382     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
383   }
384   if (scall == MAT_INITIAL_MATRIX) {
385     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
386     for (i=0; i<osm->n; i++) {
387       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
388       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
389     }
390   }
391 
392   /* Return control to the user so that the submatrices can be modified (e.g., to apply
393      different boundary conditions for the submatrices than for the global problem) */
394   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
395 
396   /*
397      Loop over submatrices putting them into local ksp
398   */
399   for (i=0; i<osm->n; i++) {
400     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
401     if (!pc->setupcalled) {
402       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
403     }
404   }
405 
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
411 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
412 {
413   PC_GASM         *osm = (PC_GASM*)pc->data;
414   PetscErrorCode ierr;
415   PetscInt       i;
416 
417   PetscFunctionBegin;
418   for (i=0; i<osm->n; i++) {
419     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "PCApply_GASM"
426 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
427 {
428   PC_GASM         *osm = (PC_GASM*)pc->data;
429   PetscErrorCode ierr;
430   PetscInt       i;
431   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
432 
433   PetscFunctionBegin;
434   /*
435      Support for limiting the restriction or interpolation to only local
436      subdomain values (leaving the other values 0).
437   */
438   if (!(osm->type & PC_GASM_RESTRICT)) {
439     forward = SCATTER_FORWARD_LOCAL;
440     /* have to zero the work RHS since scatter may leave some slots empty */
441     ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr);
442   }
443   if (!(osm->type & PC_GASM_INTERPOLATE)) {
444     reverse = SCATTER_REVERSE_LOCAL;
445   }
446 
447   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
448   ierr = VecZeroEntries(y);CHKERRQ(ierr);
449   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
450   /* do the subdomain solves */
451   for (i=0; i<osm->n; ++i) {
452     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
453   }
454   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
455   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCApplyTranspose_GASM"
461 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
462 {
463   PC_GASM         *osm = (PC_GASM*)pc->data;
464   PetscErrorCode ierr;
465   PetscInt       i;
466   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
467 
468   PetscFunctionBegin;
469   /*
470      Support for limiting the restriction or interpolation to only local
471      subdomain values (leaving the other values 0).
472 
473      Note: these are reversed from the PCApply_GASM() because we are applying the
474      transpose of the three terms
475   */
476   if (!(osm->type & PC_GASM_INTERPOLATE)) {
477     forward = SCATTER_FORWARD_LOCAL;
478     /* have to zero the work RHS since scatter may leave some slots empty */
479     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
480   }
481   if (!(osm->type & PC_GASM_RESTRICT)) {
482     reverse = SCATTER_REVERSE_LOCAL;
483   }
484 
485   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
486   ierr = VecZeroEntries(y);CHKERRQ(ierr);
487   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
488   /* do the local solves */
489   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
490     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
491   }
492   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
493   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "PCDestroy_GASM"
499 static PetscErrorCode PCDestroy_GASM(PC pc)
500 {
501   PC_GASM         *osm = (PC_GASM*)pc->data;
502   PetscErrorCode ierr;
503   PetscInt       i;
504 
505   PetscFunctionBegin;
506   if (osm->ksp) {
507     for (i=0; i<osm->n; i++) {
508       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
509     }
510     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
511   }
512   if (osm->pmat) {
513     if (osm->n > 0) {
514       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
515     }
516   }
517   for (i=0; i<osm->n; i++) {
518     ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
519     ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
520   }
521   ierr = PetscFree(osm->x);CHKERRQ(ierr);
522   ierr = PetscFree(osm->y);CHKERRQ(ierr);
523   ierr = VecDestroy(osm->gx); CHKERRQ(ierr);
524   ierr = VecDestroy(osm->gy); CHKERRQ(ierr);
525 
526   ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr);
527   ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr);
528 
529   if (osm->is) {
530     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
531   }
532 
533   if(osm->gis) {
534     ierr = ISDestroy(osm->gis); CHKERRQ(ierr);
535   }
536   if(osm->gis_local) {
537     ierr = ISDestroy(osm->gis_local); CHKERRQ(ierr);
538   }
539   ierr = PetscFree(pc->data);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "PCSetFromOptions_GASM"
545 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
546   PC_GASM         *osm = (PC_GASM*)pc->data;
547   PetscErrorCode ierr;
548   PetscInt       blocks,ovl;
549   PetscBool      symset,flg;
550   PCGASMType      gasmtype;
551 
552   PetscFunctionBegin;
553   /* set the type to symmetric if matrix is symmetric */
554   if (!osm->type_set && pc->pmat) {
555     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
556     if (symset && flg) { osm->type = PC_GASM_BASIC; }
557   }
558   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
559     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
560     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
561     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
562     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
563     flg  = PETSC_FALSE;
564     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
565     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
566   ierr = PetscOptionsTail();CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 /*------------------------------------------------------------------------------------*/
571 
572 EXTERN_C_BEGIN
573 #undef __FUNCT__
574 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
575 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
576 {
577   PC_GASM         *osm = (PC_GASM*)pc->data;
578   PetscErrorCode ierr;
579   PetscInt       i;
580 
581   PetscFunctionBegin;
582   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
583   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
584 
585   if (!pc->setupcalled) {
586     osm->n            = n;
587     osm->is           = 0;
588     osm->is_local     = 0;
589     if (is) {
590       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
591     }
592     if (is_local) {
593       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
594     }
595     if (osm->is) {
596       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
597     }
598     if (is) {
599       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
600       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
601       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
602       osm->overlap = -1;
603     }
604     if (is_local) {
605       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
606       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
607     }
608   }
609   PetscFunctionReturn(0);
610 }
611 EXTERN_C_END
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
616 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
617   PC_GASM         *osm = (PC_GASM*)pc->data;
618   PetscErrorCode ierr;
619   PetscMPIInt    rank,size;
620   PetscInt       n;
621 
622   PetscFunctionBegin;
623   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
624 
625   /*
626      Split the subdomains equally among all processors
627   */
628   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
629   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
630   n = N/size + ((N % size) > rank);
631   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
632   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
633   if (!pc->setupcalled) {
634     if (osm->is) {
635       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
636     }
637     osm->N            = N;
638     osm->n            = n;
639     osm->is           = 0;
640     osm->is_local     = 0;
641   }
642   PetscFunctionReturn(0);
643 }/* PCGASMSetTotalSubdomains_GASM() */
644 EXTERN_C_END
645 
646 EXTERN_C_BEGIN
647 #undef __FUNCT__
648 #define __FUNCT__ "PCGASMSetOverlap_GASM"
649 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
650 {
651   PC_GASM *osm = (PC_GASM*)pc->data;
652 
653   PetscFunctionBegin;
654   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
655   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
656   if (!pc->setupcalled) {
657     osm->overlap = ovl;
658   }
659   PetscFunctionReturn(0);
660 }
661 EXTERN_C_END
662 
663 EXTERN_C_BEGIN
664 #undef __FUNCT__
665 #define __FUNCT__ "PCGASMSetType_GASM"
666 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
667 {
668   PC_GASM *osm = (PC_GASM*)pc->data;
669 
670   PetscFunctionBegin;
671   osm->type     = type;
672   osm->type_set = PETSC_TRUE;
673   PetscFunctionReturn(0);
674 }
675 EXTERN_C_END
676 
677 EXTERN_C_BEGIN
678 #undef __FUNCT__
679 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
680 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
681 {
682   PC_GASM *osm = (PC_GASM*)pc->data;
683 
684   PetscFunctionBegin;
685   osm->sort_indices = doSort;
686   PetscFunctionReturn(0);
687 }
688 EXTERN_C_END
689 
690 EXTERN_C_BEGIN
691 #undef __FUNCT__
692 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
693 /*
694    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
695         In particular, it would upset the global subdomain number calculation.
696 */
697 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
698 {
699   PC_GASM         *osm = (PC_GASM*)pc->data;
700   PetscErrorCode ierr;
701 
702   PetscFunctionBegin;
703   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
704 
705   if (n) {
706     *n = osm->n;
707   }
708   if (first) {
709     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
710     *first -= osm->n;
711   }
712   if (ksp) {
713     /* Assume that local solves are now different; not necessarily
714        true though!  This flag is used only for PCView_GASM() */
715     *ksp                   = osm->ksp;
716     osm->same_local_solves = PETSC_FALSE;
717   }
718   PetscFunctionReturn(0);
719 }/* PCGASMGetSubKSP_GASM() */
720 EXTERN_C_END
721 
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "PCGASMSetLocalSubdomains"
725 /*@C
726     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
727     only) for the additive Schwarz preconditioner.
728 
729     Collective on PC
730 
731     Input Parameters:
732 +   pc - the preconditioner context
733 .   n - the number of subdomains for this processor (default value = 1)
734 .   is - the index set that defines the subdomains for this processor
735          (or PETSC_NULL for PETSc to determine subdomains)
736 -   is_local - the index sets that define the local part of the subdomains for this processor
737          (or PETSC_NULL to use the default of 1 subdomain per process)
738 
739     Notes:
740     The IS numbering is in the parallel, global numbering of the vector.
741 
742     By default the GASM preconditioner uses 1 block per processor.
743 
744     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
745 
746     Level: advanced
747 
748 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
749 
750 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
751           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
752 @*/
753 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
754 {
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
759   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "PCGASMSetTotalSubdomains"
765 /*@C
766     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
767     additive Schwarz preconditioner.  Either all or no processors in the
768     PC communicator must call this routine, with the same index sets.
769 
770     Collective on PC
771 
772     Input Parameters:
773 +   pc - the preconditioner context
774 .   n - the number of subdomains for all processors
775 .   is - the index sets that define the subdomains for all processor
776          (or PETSC_NULL for PETSc to determine subdomains)
777 -   is_local - the index sets that define the local part of the subdomains for this processor
778          (or PETSC_NULL to use the default of 1 subdomain per process)
779 
780     Options Database Key:
781     To set the total number of subdomain blocks rather than specify the
782     index sets, use the option
783 .    -pc_gasm_blocks <blks> - Sets total blocks
784 
785     Notes:
786     Currently you cannot use this to set the actual subdomains with the argument is.
787 
788     By default the GASM preconditioner uses 1 block per processor.
789 
790     These index sets cannot be destroyed until after completion of the
791     linear solves for which the GASM preconditioner is being used.
792 
793     Use PCGASMSetLocalSubdomains() to set local subdomains.
794 
795     Level: advanced
796 
797 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
798 
799 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
800           PCGASMCreateSubdomains2D()
801 @*/
802 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
803 {
804   PetscErrorCode ierr;
805 
806   PetscFunctionBegin;
807   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
808   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "PCGASMSetOverlap"
814 /*@
815     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
816     additive Schwarz preconditioner.  Either all or no processors in the
817     PC communicator must call this routine.
818 
819     Logically Collective on PC
820 
821     Input Parameters:
822 +   pc  - the preconditioner context
823 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
824 
825     Options Database Key:
826 .   -pc_gasm_overlap <ovl> - Sets overlap
827 
828     Notes:
829     By default the GASM preconditioner uses 1 block per processor.  To use
830     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
831     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
832 
833     The overlap defaults to 1, so if one desires that no additional
834     overlap be computed beyond what may have been set with a call to
835     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
836     must be set to be 0.  In particular, if one does not explicitly set
837     the subdomains an application code, then all overlap would be computed
838     internally by PETSc, and using an overlap of 0 would result in an GASM
839     variant that is equivalent to the block Jacobi preconditioner.
840 
841     Note that one can define initial index sets with any overlap via
842     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
843     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
844     if desired.
845 
846     Level: intermediate
847 
848 .keywords: PC, GASM, set, overlap
849 
850 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
851           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
852 @*/
853 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
859   PetscValidLogicalCollectiveInt(pc,ovl,2);
860   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCGASMSetType"
866 /*@
867     PCGASMSetType - Sets the type of restriction and interpolation used
868     for local problems in the additive Schwarz method.
869 
870     Logically Collective on PC
871 
872     Input Parameters:
873 +   pc  - the preconditioner context
874 -   type - variant of GASM, one of
875 .vb
876       PC_GASM_BASIC       - full interpolation and restriction
877       PC_GASM_RESTRICT    - full restriction, local processor interpolation
878       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
879       PC_GASM_NONE        - local processor restriction and interpolation
880 .ve
881 
882     Options Database Key:
883 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
884 
885     Level: intermediate
886 
887 .keywords: PC, GASM, set, type
888 
889 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
890           PCGASMCreateSubdomains2D()
891 @*/
892 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
893 {
894   PetscErrorCode ierr;
895 
896   PetscFunctionBegin;
897   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
898   PetscValidLogicalCollectiveEnum(pc,type,2);
899   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "PCGASMSetSortIndices"
905 /*@
906     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
907 
908     Logically Collective on PC
909 
910     Input Parameters:
911 +   pc  - the preconditioner context
912 -   doSort - sort the subdomain indices
913 
914     Level: intermediate
915 
916 .keywords: PC, GASM, set, type
917 
918 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
919           PCGASMCreateSubdomains2D()
920 @*/
921 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
922 {
923   PetscErrorCode ierr;
924 
925   PetscFunctionBegin;
926   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
927   PetscValidLogicalCollectiveBool(pc,doSort,2);
928   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "PCGASMGetSubKSP"
934 /*@C
935    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
936    this processor.
937 
938    Collective on PC iff first_local is requested
939 
940    Input Parameter:
941 .  pc - the preconditioner context
942 
943    Output Parameters:
944 +  n_local - the number of blocks on this processor or PETSC_NULL
945 .  first_local - the global number of the first block on this processor or PETSC_NULL,
946                  all processors must request or all must pass PETSC_NULL
947 -  ksp - the array of KSP contexts
948 
949    Note:
950    After PCGASMGetSubKSP() the array of KSPes is not to be freed
951 
952    Currently for some matrix implementations only 1 block per processor
953    is supported.
954 
955    You must call KSPSetUp() before calling PCGASMGetSubKSP().
956 
957    Level: advanced
958 
959 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
960 
961 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
962           PCGASMCreateSubdomains2D(),
963 @*/
964 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
965 {
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
970   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 /* -------------------------------------------------------------------------------------*/
975 /*MC
976    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
977            its own KSP object.
978 
979    Options Database Keys:
980 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
981 .  -pc_gasm_blocks <blks> - Sets total blocks
982 .  -pc_gasm_overlap <ovl> - Sets overlap
983 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
984 
985      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
986       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
987       -pc_gasm_type basic to use the standard GASM.
988 
989    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
990      than one processor. Defaults to one block per processor.
991 
992      To set options on the solvers for each block append -sub_ to all the KSP, and PC
993         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
994 
995      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
996          and set the options directly on the resulting KSP object (you can access its PC
997          with KSPGetPC())
998 
999 
1000    Level: beginner
1001 
1002    Concepts: additive Schwarz method
1003 
1004     References:
1005     An additive variant of the Schwarz alternating method for the case of many subregions
1006     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1007 
1008     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1009     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1010 
1011 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1012            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1013            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1014 
1015 M*/
1016 
1017 EXTERN_C_BEGIN
1018 #undef __FUNCT__
1019 #define __FUNCT__ "PCCreate_GASM"
1020 PetscErrorCode  PCCreate_GASM(PC pc)
1021 {
1022   PetscErrorCode ierr;
1023   PC_GASM         *osm;
1024 
1025   PetscFunctionBegin;
1026   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1027   osm->N                 = PETSC_DECIDE;
1028   osm->n                 = 0;
1029   osm->nmax              = 0;
1030   osm->overlap           = 1;
1031   osm->ksp               = 0;
1032   osm->grestriction      = 0;
1033   osm->gprolongation     = 0;
1034   osm->gx                = 0;
1035   osm->gy                = 0;
1036   osm->x                 = 0;
1037   osm->y                 = 0;
1038   osm->is                = 0;
1039   osm->is_local          = 0;
1040   osm->pmat              = 0;
1041   osm->type              = PC_GASM_RESTRICT;
1042   osm->same_local_solves = PETSC_TRUE;
1043   osm->sort_indices      = PETSC_TRUE;
1044 
1045   pc->data                   = (void*)osm;
1046   pc->ops->apply             = PCApply_GASM;
1047   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1048   pc->ops->setup             = PCSetUp_GASM;
1049   pc->ops->destroy           = PCDestroy_GASM;
1050   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1051   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1052   pc->ops->view              = PCView_GASM;
1053   pc->ops->applyrichardson   = 0;
1054 
1055   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1056                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1057   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1058                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1059   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1060                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1061   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1062                     PCGASMSetType_GASM);CHKERRQ(ierr);
1063   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1064                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1065   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1066                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 EXTERN_C_END
1070 
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "PCGASMCreateSubdomains"
1074 /*@C
1075    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1076    preconditioner for a any problem on a general grid.
1077 
1078    Collective
1079 
1080    Input Parameters:
1081 +  A - The global matrix operator
1082 -  n - the number of local blocks
1083 
1084    Output Parameters:
1085 .  outis - the array of index sets defining the subdomains
1086 
1087    Level: advanced
1088 
1089    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1090     from these if you use PCGASMSetLocalSubdomains()
1091 
1092     In the Fortran version you must provide the array outis[] already allocated of length n.
1093 
1094 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1095 
1096 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1097 @*/
1098 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1099 {
1100   MatPartitioning           mpart;
1101   const char                *prefix;
1102   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1103   PetscMPIInt               size;
1104   PetscInt                  i,j,rstart,rend,bs;
1105   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1106   Mat                       Ad = PETSC_NULL, adj;
1107   IS                        ispart,isnumb,*is;
1108   PetscErrorCode            ierr;
1109 
1110   PetscFunctionBegin;
1111   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1112   PetscValidPointer(outis,3);
1113   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1114 
1115   /* Get prefix, row distribution, and block size */
1116   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1117   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1118   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1119   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1120 
1121   /* Get diagonal block from matrix if possible */
1122   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1123   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1124   if (f) {
1125     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1126   } else if (size == 1) {
1127     iscopy = PETSC_FALSE; Ad = A;
1128   } else {
1129     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1130   }
1131   if (Ad) {
1132     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1133     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1134   }
1135   if (Ad && n > 1) {
1136     PetscBool  match,done;
1137     /* Try to setup a good matrix partitioning if available */
1138     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1139     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1140     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1141     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1142     if (!match) {
1143       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1144     }
1145     if (!match) { /* assume a "good" partitioner is available */
1146       PetscInt na,*ia,*ja;
1147       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1148       if (done) {
1149 	/* Build adjacency matrix by hand. Unfortunately a call to
1150 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1151 	   remove the block-aij structure and we cannot expect
1152 	   MatPartitioning to split vertices as we need */
1153 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1154 	nnz = 0;
1155 	for (i=0; i<na; i++) { /* count number of nonzeros */
1156 	  len = ia[i+1] - ia[i];
1157 	  row = ja + ia[i];
1158 	  for (j=0; j<len; j++) {
1159 	    if (row[j] == i) { /* don't count diagonal */
1160 	      len--; break;
1161 	    }
1162 	  }
1163 	  nnz += len;
1164 	}
1165 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1166 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1167 	nnz    = 0;
1168 	iia[0] = 0;
1169 	for (i=0; i<na; i++) { /* fill adjacency */
1170 	  cnt = 0;
1171 	  len = ia[i+1] - ia[i];
1172 	  row = ja + ia[i];
1173 	  for (j=0; j<len; j++) {
1174 	    if (row[j] != i) { /* if not diagonal */
1175 	      jja[nnz+cnt++] = row[j];
1176 	    }
1177 	  }
1178 	  nnz += cnt;
1179 	  iia[i+1] = nnz;
1180 	}
1181 	/* Partitioning of the adjacency matrix */
1182 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1183 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1184 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1185 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1186 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1187 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1188 	foundpart = PETSC_TRUE;
1189       }
1190       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1191     }
1192     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1193   }
1194   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1195 
1196   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1197   *outis = is;
1198 
1199   if (!foundpart) {
1200 
1201     /* Partitioning by contiguous chunks of rows */
1202 
1203     PetscInt mbs   = (rend-rstart)/bs;
1204     PetscInt start = rstart;
1205     for (i=0; i<n; i++) {
1206       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1207       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1208       start += count;
1209     }
1210 
1211   } else {
1212 
1213     /* Partitioning by adjacency of diagonal block  */
1214 
1215     const PetscInt *numbering;
1216     PetscInt       *count,nidx,*indices,*newidx,start=0;
1217     /* Get node count in each partition */
1218     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1219     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1220     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1221       for (i=0; i<n; i++) count[i] *= bs;
1222     }
1223     /* Build indices from node numbering */
1224     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1225     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1226     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1227     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1228     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1229     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1230     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1231       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1232       for (i=0; i<nidx; i++)
1233 	for (j=0; j<bs; j++)
1234 	  newidx[i*bs+j] = indices[i]*bs + j;
1235       ierr = PetscFree(indices);CHKERRQ(ierr);
1236       nidx   *= bs;
1237       indices = newidx;
1238     }
1239     /* Shift to get global indices */
1240     for (i=0; i<nidx; i++) indices[i] += rstart;
1241 
1242     /* Build the index sets for each block */
1243     for (i=0; i<n; i++) {
1244       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1245       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1246       start += count[i];
1247     }
1248 
1249     ierr = PetscFree(count);
1250     ierr = PetscFree(indices);
1251     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1252     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1253   }
1254 
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "PCGASMDestroySubdomains"
1260 /*@C
1261    PCGASMDestroySubdomains - Destroys the index sets created with
1262    PCGASMCreateSubdomains(). Should be called after setting subdomains
1263    with PCGASMSetLocalSubdomains().
1264 
1265    Collective
1266 
1267    Input Parameters:
1268 +  n - the number of index sets
1269 .  is - the array of index sets
1270 -  is_local - the array of local index sets, can be PETSC_NULL
1271 
1272    Level: advanced
1273 
1274 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1275 
1276 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1277 @*/
1278 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1279 {
1280   PetscInt       i;
1281   PetscErrorCode ierr;
1282   PetscFunctionBegin;
1283   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1284   PetscValidPointer(is,2);
1285   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1286   ierr = PetscFree(is);CHKERRQ(ierr);
1287   if (is_local) {
1288     PetscValidPointer(is_local,3);
1289     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1290     ierr = PetscFree(is_local);CHKERRQ(ierr);
1291   }
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 
1296 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1297 {                                                                                                       \
1298  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1299   /*                                                                                                    \
1300    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1301    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1302    subdomain).                                                                                          \
1303    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1304    of the intersection.                                                                                 \
1305   */                                                                                                    \
1306   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1307   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1308   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1309   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1310   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1311   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1312   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1313   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1314   /* Now compute the size of the local subdomain n. */ \
1315   *n = 0;                                               \
1316   if(*ylow_loc < *yhigh_loc) {                           \
1317     PetscInt width = xright-xleft;                     \
1318     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1319     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1320     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1321   }\
1322 }
1323 
1324 
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1328 /*@
1329    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1330    preconditioner for a two-dimensional problem on a regular grid.
1331 
1332    Collective
1333 
1334    Input Parameters:
1335 +  M, N - the global number of mesh points in the x and y directions
1336 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1337 .  dof - degrees of freedom per node
1338 -  overlap - overlap in mesh lines
1339 
1340    Output Parameters:
1341 +  Nsub - the number of local subdomains created
1342 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1343 -  is_local - array of index sets defining non-overlapping subdomains
1344 
1345    Note:
1346    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1347    preconditioners.  More general related routines are
1348    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1349 
1350    Level: advanced
1351 
1352 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1353 
1354 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1355           PCGASMSetOverlap()
1356 @*/
1357 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1358 {
1359   PetscErrorCode ierr;
1360   PetscMPIInt size, rank;
1361   PetscInt i, j;
1362   PetscInt maxheight, maxwidth;
1363   PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1364   PetscInt ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1365   PetscInt x[2][2], y[2][2], n[2];
1366   PetscInt first, last;
1367   PetscInt nidx, *idx;
1368   PetscInt ii,jj,s,q,d;
1369   PetscInt k,kk;
1370   PetscMPIInt color;
1371   MPI_Comm comm, subcomm;
1372 
1373   IS **iis;
1374   PetscFunctionBegin;
1375   ierr = PetscObjectGetComm((PetscObject)pc, &comm);     CHKERRQ(ierr);
1376   ierr = MPI_Comm_size(comm, &size);                     CHKERRQ(ierr);
1377   ierr = MPI_Comm_rank(comm, &rank);                     CHKERRQ(ierr);
1378   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);  CHKERRQ(ierr);
1379   if(first%dof || last%dof) {
1380     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1381 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1382   }
1383   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1384   s = 0;
1385   ystart = 0;
1386   for (j=0; j<Ndomains; ++j) {
1387     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1388     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1389     /* Vertical domain limits with an overlap. */
1390     ylow = PetscMax(ystart - overlap,0);
1391     yhigh = PetscMin(ystart + maxheight + overlap,N);
1392     xstart = 0;
1393     for (i=0; i<Mdomains; ++i) {
1394       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1395       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1396       /* Horizontal domain limits with an overlap. */
1397       xleft   = PetscMax(xstart - overlap,0);
1398       xright  = PetscMin(xstart + maxwidth + overlap,M);
1399       /*
1400 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1401       */
1402       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1403       if(nidx) {
1404         ++s;
1405       }
1406       xstart += maxwidth;
1407     }/* for(i = 0; i < Mdomains; ++i) */
1408     ystart += maxheight;
1409   }/* for(j = 0; j < Ndomains; ++j) */
1410   /* Now we can allocate the necessary number of ISs. */
1411   *nsub = s;
1412   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);         CHKERRQ(ierr);
1413   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);   CHKERRQ(ierr);
1414   s = 0;
1415   ystart = 0;
1416   for (j=0; j<Ndomains; ++j) {
1417     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1418     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1419     /* Vertical domain limits with an overlap. */
1420     y[0][0] = PetscMax(ystart - overlap,0);
1421     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1422     /* Vertical domain limits without an overlap. */
1423     y[1][0] = ystart;
1424     y[1][1] = PetscMin(ystart + maxheight,N);
1425     xstart = 0;
1426     for (i=0; i<Mdomains; ++i) {
1427       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1428       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1429       /* Horizontal domain limits with an overlap. */
1430       x[0][0]  = PetscMax(xstart - overlap,0);
1431       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1432       /* Horizontal domain limits without an overlap. */
1433       x[1][0] = xstart;
1434       x[1][1] = PetscMin(xstart+maxwidth,M);
1435       /*
1436 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1437 	 Do this twice: first for the domains with overlaps, and once without.
1438 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1439       */
1440       for(q = 0; q < 2; ++q) {
1441 	/*
1442 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1443 	  according to whether the domain with an overlap or without is considered.
1444 	*/
1445 	xleft = x[q][0]; xright = x[q][1];
1446 	ylow  = y[q][0]; yhigh  = y[q][1];
1447         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1448 	nidx *= dof;
1449         n[q] = nidx;
1450         /*
1451          Based on the counted number of indices in the local domain *with an overlap*,
1452          construct a subcommunicator of all the processors supporting this domain.
1453          Observe that a domain with an overlap might have nontrivial local support,
1454          while the domain without an overlap might not.  Hence, the decision to participate
1455          in the subcommunicator must be based on the domain with an overlap.
1456          */
1457 	if(q == 0) {
1458 	  if(nidx) {
1459 	    color = 1;
1460 	  }
1461 	  else {
1462 	    color = MPI_UNDEFINED;
1463 	  }
1464 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr);
1465 	}
1466         /*
1467          Proceed only if the number of local indices *with an overlap* is nonzero.
1468          */
1469         if(n[0]) {
1470           if(q == 0) {
1471             iis = is;
1472           }
1473           if(q == 1) {
1474             /*
1475              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1476              Moreover, if the overlap is zero, the two ISs are identical.
1477              */
1478             if(overlap == 0) {
1479               (*is_local)[s] = (*is)[s];
1480               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1481               continue;
1482             }
1483             else {
1484               iis = is_local;
1485               subcomm = ((PetscObject)(*is)[s])->comm;
1486             }
1487           }/* if(q == 1) */
1488           idx = PETSC_NULL;
1489 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1490           if(nidx) {
1491             k    = 0;
1492             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1493               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1494               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1495               kk = dof*(M*jj + x0);
1496               for (ii=x0; ii<x1; ++ii) {
1497                 for(d = 0; d < dof; ++d) {
1498                   idx[k++] = kk++;
1499                 }
1500               }
1501             }
1502           }
1503 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1504 	}/* if(n[0]) */
1505       }/* for(q = 0; q < 2; ++q) */
1506       if(n[0]) {
1507         ++s;
1508       }
1509       xstart += maxwidth;
1510     }/* for(i = 0; i < Mdomains; ++i) */
1511     ystart += maxheight;
1512   }/* for(j = 0; j < Ndomains; ++j) */
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1518 /*@C
1519     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1520     only) for the additive Schwarz preconditioner.
1521 
1522     Not Collective
1523 
1524     Input Parameter:
1525 .   pc - the preconditioner context
1526 
1527     Output Parameters:
1528 +   n - the number of subdomains for this processor (default value = 1)
1529 .   is - the index sets that define the subdomains for this processor
1530 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1531 
1532 
1533     Notes:
1534     The IS numbering is in the parallel, global numbering of the vector.
1535 
1536     Level: advanced
1537 
1538 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1539 
1540 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1541           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1542 @*/
1543 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1544 {
1545   PC_GASM         *osm;
1546   PetscErrorCode ierr;
1547   PetscBool      match;
1548 
1549   PetscFunctionBegin;
1550   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1551   PetscValidIntPointer(n,2);
1552   if (is) PetscValidPointer(is,3);
1553   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1554   if (!match) {
1555     if (n)  *n  = 0;
1556     if (is) *is = PETSC_NULL;
1557   } else {
1558     osm = (PC_GASM*)pc->data;
1559     if (n)  *n  = osm->n;
1560     if (is) *is = osm->is;
1561     if (is_local) *is_local = osm->is_local;
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1568 /*@C
1569     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1570     only) for the additive Schwarz preconditioner.
1571 
1572     Not Collective
1573 
1574     Input Parameter:
1575 .   pc - the preconditioner context
1576 
1577     Output Parameters:
1578 +   n - the number of matrices for this processor (default value = 1)
1579 -   mat - the matrices
1580 
1581 
1582     Level: advanced
1583 
1584 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1585 
1586 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1587           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1588 @*/
1589 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1590 {
1591   PC_GASM         *osm;
1592   PetscErrorCode ierr;
1593   PetscBool      match;
1594 
1595   PetscFunctionBegin;
1596   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1597   PetscValidIntPointer(n,2);
1598   if (mat) PetscValidPointer(mat,3);
1599   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1600   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1601   if (!match) {
1602     if (n)   *n   = 0;
1603     if (mat) *mat = PETSC_NULL;
1604   } else {
1605     osm = (PC_GASM*)pc->data;
1606     if (n)   *n   = osm->n;
1607     if (mat) *mat = osm->pmat;
1608   }
1609   PetscFunctionReturn(0);
1610 }
1611