xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision eabe889f41eab0385d57eba42f0f78f2ecc7ced4)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <petsc-private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
74     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
75     ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
76     ierr = PetscFree(cidx);CHKERRQ(ierr);
77     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
78     if (osm->is_local) {
79       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
80       ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
81       /*
82        No more than 15 characters per index plus a space.
83        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
84        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
85        For nidx == 0, the whole string 16 '\0'.
86        */
87       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
88       ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
89       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
90       for(j = 0; j < nidx; ++j) {
91         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
92       }
93       ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
94       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
95 
96       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr);
98       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
99       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
100       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
101       ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
102       ierr = PetscFree(cidx);CHKERRQ(ierr);
103       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "PCView_GASM"
112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
113 {
114   PC_GASM         *osm = (PC_GASM*)pc->data;
115   const char     *prefix;
116   PetscErrorCode ierr;
117   PetscMPIInt    rank, size;
118   PetscInt       i,bsz;
119   PetscBool      iascii,isstring, print_subdomains=PETSC_FALSE;
120   PetscViewer    sviewer;
121 
122 
123   PetscFunctionBegin;
124   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
125   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
126   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
127   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
128   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
129   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
130   if (iascii) {
131     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
132     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
133     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
134     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
135     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
136     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
137     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
138     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
139     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
140     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
141     if (osm->same_local_solves) {
142       if (osm->ksp) {
143         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
144         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
145         if (!rank) {
146           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
147           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
148           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
149         }
150         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
151       }
152     } else {
153       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
154       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
155       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
156       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
157       for (i=0; i<osm->nmax; i++) {
158         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
159         if (i < osm->n) {
160           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
161           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr);
162           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
163           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
164           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
165           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
166           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr);
167         }
168         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169       }
170       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
171       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172     }
173   } else if (isstring) {
174     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
175     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
176     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
178   } else {
179     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
180   }
181   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
182   if(print_subdomains) {
183     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 
189 
190 
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "PCSetUp_GASM"
194 static PetscErrorCode PCSetUp_GASM(PC pc)
195 {
196   PC_GASM         *osm  = (PC_GASM*)pc->data;
197   PetscErrorCode ierr;
198   PetscBool      symset,flg;
199   PetscInt       i;
200   PetscMPIInt    rank, size;
201   MatReuse       scall = MAT_REUSE_MATRIX;
202   KSP            ksp;
203   PC             subpc;
204   const char     *prefix,*pprefix;
205   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
206   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
207   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
208   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
209   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
210   Vec            x,y;
211   PetscScalar    *gxarray, *gyarray;
212   PetscInt       gfirst, glast;
213   DM             *domain_dm = PETSC_NULL;
214 
215   PetscFunctionBegin;
216   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
217   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
218   if (!pc->setupcalled) {
219 
220     if (!osm->type_set) {
221       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
222       if (symset && flg) { osm->type = PC_GASM_BASIC; }
223     }
224 
225     /* Note: if osm->n has been set, it is at least 1. */
226     if (osm->N == PETSC_DECIDE && osm->n < 1) {
227       /* no subdomains given */
228       /* try pc->dm first */
229       if(pc->dm) {
230         char      ddm_name[1024];
231         DM        ddm;
232         PetscBool flg;
233         PetscInt     num_domains, d;
234         char         **domain_names;
235         IS           *domain_is;
236         /* Allow the user to request a decomposition DM by name */
237         ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr);
238         ierr = PetscOptionsString("-pc_asm_decomposition_dm", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr);
239         if(flg) {
240           ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr);
241           if(!ddm) {
242             SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name);
243           }
244           ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr);
245           ierr = PCSetDM(pc,ddm); CHKERRQ(ierr);
246         }
247         ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm);    CHKERRQ(ierr);
248         if(num_domains) {
249           ierr = PCGASMSetLocalSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr);
250         }
251         for(d = 0; d < num_domains; ++d) {
252           ierr = PetscFree(domain_names[d]); CHKERRQ(ierr);
253           ierr = ISDestroy(&domain_is[d]);   CHKERRQ(ierr);
254         }
255         ierr = PetscFree(domain_names);CHKERRQ(ierr);
256         ierr = PetscFree(domain_is);CHKERRQ(ierr);
257       }
258       if (osm->N == PETSC_DECIDE && osm->n < 1) { /* still no subdomains; use one per processor */
259         osm->nmax = osm->n = 1;
260         ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
261         osm->N = size;
262       }
263     } else if (osm->N == PETSC_DECIDE) {
264       PetscInt inwork[2], outwork[2];
265       /* determine global number of subdomains and the max number of local subdomains */
266       inwork[0] = inwork[1] = osm->n;
267       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
268       osm->nmax = outwork[0];
269       osm->N    = outwork[1];
270     }
271     if (!osm->is){ /* create the index sets */
272       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
273     }
274     if (!osm->is_local) {
275       /*
276 	 This indicates that osm->is should define a nonoverlapping decomposition
277 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
278 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
279 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
280 	 Therefore, osm->is will be used to define osm->is_local.
281 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
282 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
283 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
284       */
285       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
286       for (i=0; i<osm->n; i++) {
287         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
288           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
289           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
290         } else {
291           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
292           osm->is_local[i] = osm->is[i];
293         }
294       }
295     }
296     /* Beyond this point osm->is_local is not null. */
297     if (osm->overlap > 0) {
298       /* Extend the "overlapping" regions by a number of steps */
299       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
300     }
301     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
302     flg  = PETSC_FALSE;
303     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
304     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
305 
306     if (osm->sort_indices) {
307       for (i=0; i<osm->n; i++) {
308         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
309 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
310       }
311     }
312     /* Merge the ISs, create merged vectors and scatter contexts. */
313     /* Restriction ISs. */
314     ddn = 0;
315     for (i=0; i<osm->n; i++) {
316       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
317       ddn += dn;
318     }
319     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
320     ddn = 0;
321     for (i=0; i<osm->n; i++) {
322       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
323       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
324       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
325       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
326       ddn += dn;
327     }
328     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
329     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
330     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
331     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
332     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
333     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
334     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
335     ierr = ISDestroy(&gid);CHKERRQ(ierr);
336     /* Prolongation ISs */
337     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
338       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
339       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
340       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
341       /**/
342       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
343       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
344       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
345       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
346       PetscInt               j;
347       ddn_local = 0;
348       for (i=0; i<osm->n; i++) {
349 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
350 	ddn_local += dn_local;
351       }
352       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
353       ddn_local = 0;
354       for (i=0; i<osm->n; i++) {
355 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
356 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
357 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
358 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
359 	ddn_local += dn_local;
360       }
361       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
362       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
363       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
364       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
365       ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
366       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
367       /* Now convert these localized indices into the global indices into the merged output vector. */
368       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
369       for(j=0; j < ddn_llocal; ++j) {
370 	ddidx_llocal[j] += gfirst;
371       }
372       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
373       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
374       ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr);
375     }
376     /* Create the subdomain work vectors. */
377     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
378     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
379     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
380     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
381     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
382     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
383       PetscInt dN;
384       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
385       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
386       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
387       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
388     }
389     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
390     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
391     ierr = VecDestroy(&x);CHKERRQ(ierr);
392     ierr = VecDestroy(&y);CHKERRQ(ierr);
393     /* Create the local solvers */
394     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
395     for (i=0; i<osm->n; i++) { /* KSPs are local */
396       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
397       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
398       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
399       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
400       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
401       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
402       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
403       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
404       osm->ksp[i] = ksp;
405     }
406     scall = MAT_INITIAL_MATRIX;
407 
408   } else {
409     /*
410        Destroy the blocks from the previous iteration
411     */
412     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
413       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
414       scall = MAT_INITIAL_MATRIX;
415     }
416   }
417 
418   /*
419      Extract out the submatrices.
420   */
421   if(size > 1) {
422     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
423   }
424   else {
425     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
426   }
427   if (scall == MAT_INITIAL_MATRIX) {
428     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
429     for (i=0; i<osm->n; i++) {
430       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
431       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
432     }
433   }
434 
435   /* Return control to the user so that the submatrices can be modified (e.g., to apply
436      different boundary conditions for the submatrices than for the global problem) */
437   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
438 
439   /*
440      Loop over submatrices putting them into local ksp
441   */
442   for (i=0; i<osm->n; i++) {
443     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
444     if (!pc->setupcalled) {
445       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
446     }
447   }
448 
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
454 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
455 {
456   PC_GASM         *osm = (PC_GASM*)pc->data;
457   PetscErrorCode ierr;
458   PetscInt       i;
459 
460   PetscFunctionBegin;
461   for (i=0; i<osm->n; i++) {
462     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
463   }
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "PCApply_GASM"
469 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
470 {
471   PC_GASM         *osm = (PC_GASM*)pc->data;
472   PetscErrorCode ierr;
473   PetscInt       i;
474   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
475 
476   PetscFunctionBegin;
477   /*
478      Support for limiting the restriction or interpolation to only local
479      subdomain values (leaving the other values 0).
480   */
481   if (!(osm->type & PC_GASM_RESTRICT)) {
482     forward = SCATTER_FORWARD_LOCAL;
483     /* have to zero the work RHS since scatter may leave some slots empty */
484     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
485   }
486   if (!(osm->type & PC_GASM_INTERPOLATE)) {
487     reverse = SCATTER_REVERSE_LOCAL;
488   }
489 
490   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
491   ierr = VecZeroEntries(y);CHKERRQ(ierr);
492   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
493   /* do the subdomain solves */
494   for (i=0; i<osm->n; ++i) {
495     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
496   }
497   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
498   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "PCApplyTranspose_GASM"
504 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
505 {
506   PC_GASM         *osm = (PC_GASM*)pc->data;
507   PetscErrorCode ierr;
508   PetscInt       i;
509   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
510 
511   PetscFunctionBegin;
512   /*
513      Support for limiting the restriction or interpolation to only local
514      subdomain values (leaving the other values 0).
515 
516      Note: these are reversed from the PCApply_GASM() because we are applying the
517      transpose of the three terms
518   */
519   if (!(osm->type & PC_GASM_INTERPOLATE)) {
520     forward = SCATTER_FORWARD_LOCAL;
521     /* have to zero the work RHS since scatter may leave some slots empty */
522     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
523   }
524   if (!(osm->type & PC_GASM_RESTRICT)) {
525     reverse = SCATTER_REVERSE_LOCAL;
526   }
527 
528   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
529   ierr = VecZeroEntries(y);CHKERRQ(ierr);
530   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
531   /* do the local solves */
532   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
533     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
534   }
535   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
536   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "PCReset_GASM"
542 static PetscErrorCode PCReset_GASM(PC pc)
543 {
544   PC_GASM        *osm = (PC_GASM*)pc->data;
545   PetscErrorCode ierr;
546   PetscInt       i;
547 
548   PetscFunctionBegin;
549   if (osm->ksp) {
550     for (i=0; i<osm->n; i++) {
551       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
552     }
553   }
554   if (osm->pmat) {
555     if (osm->n > 0) {
556       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
557     }
558   }
559   if (osm->x) {
560     for (i=0; i<osm->n; i++) {
561       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
562       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
563     }
564   }
565   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
566   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
567 
568   ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr);
569   ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr);
570   if (osm->is) {
571     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
572     osm->is = 0;
573     osm->is_local = 0;
574   }
575   ierr = ISDestroy(&osm->gis);CHKERRQ(ierr);
576   ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "PCDestroy_GASM"
582 static PetscErrorCode PCDestroy_GASM(PC pc)
583 {
584   PC_GASM         *osm = (PC_GASM*)pc->data;
585   PetscErrorCode ierr;
586   PetscInt       i;
587 
588   PetscFunctionBegin;
589   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
590   if (osm->ksp) {
591     for (i=0; i<osm->n; i++) {
592       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
593     }
594     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
595   }
596   ierr = PetscFree(osm->x);CHKERRQ(ierr);
597   ierr = PetscFree(osm->y);CHKERRQ(ierr);
598   ierr = PetscFree(pc->data);CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "PCSetFromOptions_GASM"
604 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
605   PC_GASM         *osm = (PC_GASM*)pc->data;
606   PetscErrorCode ierr;
607   PetscInt       blocks,ovl;
608   PetscBool      symset,flg;
609   PCGASMType      gasmtype;
610 
611   PetscFunctionBegin;
612   /* set the type to symmetric if matrix is symmetric */
613   if (!osm->type_set && pc->pmat) {
614     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
615     if (symset && flg) { osm->type = PC_GASM_BASIC; }
616   }
617   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
618     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
619     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
620     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
621     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
622     flg  = PETSC_FALSE;
623     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
624     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
625   ierr = PetscOptionsTail();CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 /*------------------------------------------------------------------------------------*/
630 
631 EXTERN_C_BEGIN
632 #undef __FUNCT__
633 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
634 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
635 {
636   PC_GASM         *osm = (PC_GASM*)pc->data;
637   PetscErrorCode ierr;
638   PetscInt       i;
639 
640   PetscFunctionBegin;
641   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, n = %D",n);
642   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
643 
644   if (!pc->setupcalled) {
645     osm->n            = n;
646     osm->is           = 0;
647     osm->is_local     = 0;
648     if (is) {
649       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
650     }
651     if (is_local) {
652       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
653     }
654     if (osm->is) {
655       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
656     }
657     if (is) {
658       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
659       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
660       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
661       osm->overlap = -1;
662     }
663     if (is_local) {
664       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
665       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 EXTERN_C_END
671 
672 EXTERN_C_BEGIN
673 #undef __FUNCT__
674 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
675 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
676   PC_GASM         *osm = (PC_GASM*)pc->data;
677   PetscErrorCode ierr;
678   PetscMPIInt    rank,size;
679   PetscInt       n;
680 
681   PetscFunctionBegin;
682   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
683 
684   /*
685      Split the subdomains equally among all processors
686   */
687   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
688   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
689   n = N/size + ((N % size) > rank);
690   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
691   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
692   if (!pc->setupcalled) {
693     if (osm->is) {
694       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
695     }
696     osm->N            = N;
697     osm->n            = n;
698     osm->is           = 0;
699     osm->is_local     = 0;
700   }
701   PetscFunctionReturn(0);
702 }/* PCGASMSetTotalSubdomains_GASM() */
703 EXTERN_C_END
704 
705 EXTERN_C_BEGIN
706 #undef __FUNCT__
707 #define __FUNCT__ "PCGASMSetOverlap_GASM"
708 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
709 {
710   PC_GASM *osm = (PC_GASM*)pc->data;
711 
712   PetscFunctionBegin;
713   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
714   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
715   if (!pc->setupcalled) {
716     osm->overlap = ovl;
717   }
718   PetscFunctionReturn(0);
719 }
720 EXTERN_C_END
721 
722 EXTERN_C_BEGIN
723 #undef __FUNCT__
724 #define __FUNCT__ "PCGASMSetType_GASM"
725 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
726 {
727   PC_GASM *osm = (PC_GASM*)pc->data;
728 
729   PetscFunctionBegin;
730   osm->type     = type;
731   osm->type_set = PETSC_TRUE;
732   PetscFunctionReturn(0);
733 }
734 EXTERN_C_END
735 
736 EXTERN_C_BEGIN
737 #undef __FUNCT__
738 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
739 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
740 {
741   PC_GASM *osm = (PC_GASM*)pc->data;
742 
743   PetscFunctionBegin;
744   osm->sort_indices = doSort;
745   PetscFunctionReturn(0);
746 }
747 EXTERN_C_END
748 
749 EXTERN_C_BEGIN
750 #undef __FUNCT__
751 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
752 /*
753    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
754         In particular, it would upset the global subdomain number calculation.
755 */
756 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
757 {
758   PC_GASM         *osm = (PC_GASM*)pc->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
763 
764   if (n) {
765     *n = osm->n;
766   }
767   if (first) {
768     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
769     *first -= osm->n;
770   }
771   if (ksp) {
772     /* Assume that local solves are now different; not necessarily
773        true though!  This flag is used only for PCView_GASM() */
774     *ksp                   = osm->ksp;
775     osm->same_local_solves = PETSC_FALSE;
776   }
777   PetscFunctionReturn(0);
778 }/* PCGASMGetSubKSP_GASM() */
779 EXTERN_C_END
780 
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "PCGASMSetLocalSubdomains"
784 /*@C
785     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
786     only) for the additive Schwarz preconditioner.
787 
788     Collective on PC
789 
790     Input Parameters:
791 +   pc - the preconditioner context
792 .   n - the number of subdomains for this processor (default value = 1)
793 .   is - the index set that defines the subdomains for this processor
794          (or PETSC_NULL for PETSc to determine subdomains)
795 -   is_local - the index sets that define the local part of the subdomains for this processor
796          (or PETSC_NULL to use the default of 1 subdomain per process)
797 
798     Notes:
799     The IS numbering is in the parallel, global numbering of the vector.
800 
801     By default the GASM preconditioner uses 1 block per processor.
802 
803     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
804 
805     Level: advanced
806 
807 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
808 
809 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
810           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
811 @*/
812 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
813 {
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
818   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "PCGASMSetTotalSubdomains"
824 /*@C
825     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
826     additive Schwarz preconditioner.  Either all or no processors in the
827     PC communicator must call this routine, with the same index sets.
828 
829     Collective on PC
830 
831     Input Parameters:
832 +   pc - the preconditioner context
833 .   n - the number of subdomains for all processors
834 .   is - the index sets that define the subdomains for all processor
835          (or PETSC_NULL for PETSc to determine subdomains)
836 -   is_local - the index sets that define the local part of the subdomains for this processor
837          (or PETSC_NULL to use the default of 1 subdomain per process)
838 
839     Options Database Key:
840     To set the total number of subdomain blocks rather than specify the
841     index sets, use the option
842 .    -pc_gasm_blocks <blks> - Sets total blocks
843 
844     Notes:
845     Currently you cannot use this to set the actual subdomains with the argument is.
846 
847     By default the GASM preconditioner uses 1 block per processor.
848 
849     These index sets cannot be destroyed until after completion of the
850     linear solves for which the GASM preconditioner is being used.
851 
852     Use PCGASMSetLocalSubdomains() to set local subdomains.
853 
854     Level: advanced
855 
856 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
857 
858 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
859           PCGASMCreateSubdomains2D()
860 @*/
861 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
862 {
863   PetscErrorCode ierr;
864 
865   PetscFunctionBegin;
866   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
867   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCGASMSetOverlap"
873 /*@
874     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
875     additive Schwarz preconditioner.  Either all or no processors in the
876     PC communicator must call this routine.
877 
878     Logically Collective on PC
879 
880     Input Parameters:
881 +   pc  - the preconditioner context
882 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
883 
884     Options Database Key:
885 .   -pc_gasm_overlap <ovl> - Sets overlap
886 
887     Notes:
888     By default the GASM preconditioner uses 1 block per processor.  To use
889     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
890     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
891 
892     The overlap defaults to 1, so if one desires that no additional
893     overlap be computed beyond what may have been set with a call to
894     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
895     must be set to be 0.  In particular, if one does not explicitly set
896     the subdomains an application code, then all overlap would be computed
897     internally by PETSc, and using an overlap of 0 would result in an GASM
898     variant that is equivalent to the block Jacobi preconditioner.
899 
900     Note that one can define initial index sets with any overlap via
901     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
902     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
903     if desired.
904 
905     Level: intermediate
906 
907 .keywords: PC, GASM, set, overlap
908 
909 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
910           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
911 @*/
912 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
913 {
914   PetscErrorCode ierr;
915 
916   PetscFunctionBegin;
917   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
918   PetscValidLogicalCollectiveInt(pc,ovl,2);
919   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "PCGASMSetType"
925 /*@
926     PCGASMSetType - Sets the type of restriction and interpolation used
927     for local problems in the additive Schwarz method.
928 
929     Logically Collective on PC
930 
931     Input Parameters:
932 +   pc  - the preconditioner context
933 -   type - variant of GASM, one of
934 .vb
935       PC_GASM_BASIC       - full interpolation and restriction
936       PC_GASM_RESTRICT    - full restriction, local processor interpolation
937       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
938       PC_GASM_NONE        - local processor restriction and interpolation
939 .ve
940 
941     Options Database Key:
942 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
943 
944     Level: intermediate
945 
946 .keywords: PC, GASM, set, type
947 
948 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
949           PCGASMCreateSubdomains2D()
950 @*/
951 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
952 {
953   PetscErrorCode ierr;
954 
955   PetscFunctionBegin;
956   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
957   PetscValidLogicalCollectiveEnum(pc,type,2);
958   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "PCGASMSetSortIndices"
964 /*@
965     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
966 
967     Logically Collective on PC
968 
969     Input Parameters:
970 +   pc  - the preconditioner context
971 -   doSort - sort the subdomain indices
972 
973     Level: intermediate
974 
975 .keywords: PC, GASM, set, type
976 
977 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
978           PCGASMCreateSubdomains2D()
979 @*/
980 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
981 {
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
986   PetscValidLogicalCollectiveBool(pc,doSort,2);
987   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "PCGASMGetSubKSP"
993 /*@C
994    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
995    this processor.
996 
997    Collective on PC iff first_local is requested
998 
999    Input Parameter:
1000 .  pc - the preconditioner context
1001 
1002    Output Parameters:
1003 +  n_local - the number of blocks on this processor or PETSC_NULL
1004 .  first_local - the global number of the first block on this processor or PETSC_NULL,
1005                  all processors must request or all must pass PETSC_NULL
1006 -  ksp - the array of KSP contexts
1007 
1008    Note:
1009    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1010 
1011    Currently for some matrix implementations only 1 block per processor
1012    is supported.
1013 
1014    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1015 
1016    Level: advanced
1017 
1018 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1019 
1020 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
1021           PCGASMCreateSubdomains2D(),
1022 @*/
1023 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1024 {
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1029   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 /* -------------------------------------------------------------------------------------*/
1034 /*MC
1035    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1036            its own KSP object.
1037 
1038    Options Database Keys:
1039 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
1040 .  -pc_gasm_blocks <blks> - Sets total blocks
1041 .  -pc_gasm_overlap <ovl> - Sets overlap
1042 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1043 
1044      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1045       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1046       -pc_gasm_type basic to use the standard GASM.
1047 
1048    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1049      than one processor. Defaults to one block per processor.
1050 
1051      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1052         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1053 
1054      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1055          and set the options directly on the resulting KSP object (you can access its PC
1056          with KSPGetPC())
1057 
1058 
1059    Level: beginner
1060 
1061    Concepts: additive Schwarz method
1062 
1063     References:
1064     An additive variant of the Schwarz alternating method for the case of many subregions
1065     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1066 
1067     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1068     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1069 
1070 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1071            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1072            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1073 
1074 M*/
1075 
1076 EXTERN_C_BEGIN
1077 #undef __FUNCT__
1078 #define __FUNCT__ "PCCreate_GASM"
1079 PetscErrorCode  PCCreate_GASM(PC pc)
1080 {
1081   PetscErrorCode ierr;
1082   PC_GASM         *osm;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1086   osm->N                 = PETSC_DECIDE;
1087   osm->n                 = 0;
1088   osm->nmax              = 0;
1089   osm->overlap           = 1;
1090   osm->ksp               = 0;
1091   osm->grestriction      = 0;
1092   osm->gprolongation     = 0;
1093   osm->gx                = 0;
1094   osm->gy                = 0;
1095   osm->x                 = 0;
1096   osm->y                 = 0;
1097   osm->is                = 0;
1098   osm->is_local          = 0;
1099   osm->pmat              = 0;
1100   osm->type              = PC_GASM_RESTRICT;
1101   osm->same_local_solves = PETSC_TRUE;
1102   osm->sort_indices      = PETSC_TRUE;
1103 
1104   pc->data                   = (void*)osm;
1105   pc->ops->apply             = PCApply_GASM;
1106   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1107   pc->ops->setup             = PCSetUp_GASM;
1108   pc->ops->reset             = PCReset_GASM;
1109   pc->ops->destroy           = PCDestroy_GASM;
1110   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1111   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1112   pc->ops->view              = PCView_GASM;
1113   pc->ops->applyrichardson   = 0;
1114 
1115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1116                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1118                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1120                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1122                     PCGASMSetType_GASM);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1124                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1126                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 EXTERN_C_END
1130 
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "PCGASMCreateSubdomains"
1134 /*@C
1135    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1136    preconditioner for a any problem on a general grid.
1137 
1138    Collective
1139 
1140    Input Parameters:
1141 +  A - The global matrix operator
1142 -  n - the number of local blocks
1143 
1144    Output Parameters:
1145 .  outis - the array of index sets defining the subdomains
1146 
1147    Level: advanced
1148 
1149    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1150     from these if you use PCGASMSetLocalSubdomains()
1151 
1152     In the Fortran version you must provide the array outis[] already allocated of length n.
1153 
1154 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1155 
1156 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1157 @*/
1158 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1159 {
1160   MatPartitioning           mpart;
1161   const char                *prefix;
1162   PetscErrorCode            (*f)(Mat,MatReuse,Mat*);
1163   PetscMPIInt               size;
1164   PetscInt                  i,j,rstart,rend,bs;
1165   PetscBool                 isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1166   Mat                       Ad = PETSC_NULL, adj;
1167   IS                        ispart,isnumb,*is;
1168   PetscErrorCode            ierr;
1169 
1170   PetscFunctionBegin;
1171   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1172   PetscValidPointer(outis,3);
1173   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1174 
1175   /* Get prefix, row distribution, and block size */
1176   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1177   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1178   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1179   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1180 
1181   /* Get diagonal block from matrix if possible */
1182   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1183   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1184   if (f) {
1185     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1186   } else if (size == 1) {
1187     Ad = A;
1188   }
1189   if (Ad) {
1190     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1191     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1192   }
1193   if (Ad && n > 1) {
1194     PetscBool  match,done;
1195     /* Try to setup a good matrix partitioning if available */
1196     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1197     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1198     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1199     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1200     if (!match) {
1201       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1202     }
1203     if (!match) { /* assume a "good" partitioner is available */
1204       PetscInt na,*ia,*ja;
1205       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1206       if (done) {
1207         /* Build adjacency matrix by hand. Unfortunately a call to
1208            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1209            remove the block-aij structure and we cannot expect
1210            MatPartitioning to split vertices as we need */
1211         PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1212         nnz = 0;
1213         for (i=0; i<na; i++) { /* count number of nonzeros */
1214           len = ia[i+1] - ia[i];
1215           row = ja + ia[i];
1216           for (j=0; j<len; j++) {
1217             if (row[j] == i) { /* don't count diagonal */
1218               len--; break;
1219             }
1220           }
1221           nnz += len;
1222         }
1223         ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1224         ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1225         nnz    = 0;
1226         iia[0] = 0;
1227         for (i=0; i<na; i++) { /* fill adjacency */
1228           cnt = 0;
1229           len = ia[i+1] - ia[i];
1230           row = ja + ia[i];
1231           for (j=0; j<len; j++) {
1232             if (row[j] != i) { /* if not diagonal */
1233               jja[nnz+cnt++] = row[j];
1234             }
1235           }
1236           nnz += cnt;
1237           iia[i+1] = nnz;
1238         }
1239         /* Partitioning of the adjacency matrix */
1240         ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1241         ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1242         ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1243         ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1244         ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1245         ierr = MatDestroy(&adj);CHKERRQ(ierr);
1246         foundpart = PETSC_TRUE;
1247       }
1248       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1249     }
1250     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1251   }
1252 
1253   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1254   *outis = is;
1255 
1256   if (!foundpart) {
1257 
1258     /* Partitioning by contiguous chunks of rows */
1259 
1260     PetscInt mbs   = (rend-rstart)/bs;
1261     PetscInt start = rstart;
1262     for (i=0; i<n; i++) {
1263       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1264       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1265       start += count;
1266     }
1267 
1268   } else {
1269 
1270     /* Partitioning by adjacency of diagonal block  */
1271 
1272     const PetscInt *numbering;
1273     PetscInt       *count,nidx,*indices,*newidx,start=0;
1274     /* Get node count in each partition */
1275     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1276     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1277     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1278       for (i=0; i<n; i++) count[i] *= bs;
1279     }
1280     /* Build indices from node numbering */
1281     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1282     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1283     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1284     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1285     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1286     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1287     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1288       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1289       for (i=0; i<nidx; i++)
1290         for (j=0; j<bs; j++)
1291           newidx[i*bs+j] = indices[i]*bs + j;
1292       ierr = PetscFree(indices);CHKERRQ(ierr);
1293       nidx   *= bs;
1294       indices = newidx;
1295     }
1296     /* Shift to get global indices */
1297     for (i=0; i<nidx; i++) indices[i] += rstart;
1298 
1299     /* Build the index sets for each block */
1300     for (i=0; i<n; i++) {
1301       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1302       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1303       start += count[i];
1304     }
1305 
1306     ierr = PetscFree(count);
1307     ierr = PetscFree(indices);
1308     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1309     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1310   }
1311 
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "PCGASMDestroySubdomains"
1317 /*@C
1318    PCGASMDestroySubdomains - Destroys the index sets created with
1319    PCGASMCreateSubdomains(). Should be called after setting subdomains
1320    with PCGASMSetLocalSubdomains().
1321 
1322    Collective
1323 
1324    Input Parameters:
1325 +  n - the number of index sets
1326 .  is - the array of index sets
1327 -  is_local - the array of local index sets, can be PETSC_NULL
1328 
1329    Level: advanced
1330 
1331 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1332 
1333 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1334 @*/
1335 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1336 {
1337   PetscInt       i;
1338   PetscErrorCode ierr;
1339   PetscFunctionBegin;
1340   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1341   PetscValidPointer(is,2);
1342   for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); }
1343   ierr = PetscFree(is);CHKERRQ(ierr);
1344   if (is_local) {
1345     PetscValidPointer(is_local,3);
1346     for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); }
1347     ierr = PetscFree(is_local);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 
1353 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1354 {                                                                                                       \
1355  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1356   /*                                                                                                    \
1357    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1358    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1359    subdomain).                                                                                          \
1360    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1361    of the intersection.                                                                                 \
1362   */                                                                                                    \
1363   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1364   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1365   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1366   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1367   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1368   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1369   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1370   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1371   /* Now compute the size of the local subdomain n. */ \
1372   *n = 0;                                               \
1373   if(*ylow_loc < *yhigh_loc) {                           \
1374     PetscInt width = xright-xleft;                     \
1375     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1376     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1377     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1378   }\
1379 }
1380 
1381 
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1385 /*@
1386    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1387    preconditioner for a two-dimensional problem on a regular grid.
1388 
1389    Collective
1390 
1391    Input Parameters:
1392 +  M, N - the global number of mesh points in the x and y directions
1393 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1394 .  dof - degrees of freedom per node
1395 -  overlap - overlap in mesh lines
1396 
1397    Output Parameters:
1398 +  Nsub - the number of local subdomains created
1399 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1400 -  is_local - array of index sets defining non-overlapping subdomains
1401 
1402    Note:
1403    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1404    preconditioners.  More general related routines are
1405    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1406 
1407    Level: advanced
1408 
1409 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1410 
1411 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1412           PCGASMSetOverlap()
1413 @*/
1414 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1415 {
1416   PetscErrorCode ierr;
1417   PetscMPIInt    size, rank;
1418   PetscInt       i, j;
1419   PetscInt       maxheight, maxwidth;
1420   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1421   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1422   PetscInt       x[2][2], y[2][2], n[2];
1423   PetscInt       first, last;
1424   PetscInt       nidx, *idx;
1425   PetscInt       ii,jj,s,q,d;
1426   PetscInt       k,kk;
1427   PetscMPIInt    color;
1428   MPI_Comm       comm, subcomm;
1429   IS             **iis = 0;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1433   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1434   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1435   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1436   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1437 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1438 
1439   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1440   s = 0;
1441   ystart = 0;
1442   for (j=0; j<Ndomains; ++j) {
1443     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1444     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1445     /* Vertical domain limits with an overlap. */
1446     ylow = PetscMax(ystart - overlap,0);
1447     yhigh = PetscMin(ystart + maxheight + overlap,N);
1448     xstart = 0;
1449     for (i=0; i<Mdomains; ++i) {
1450       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1451       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1452       /* Horizontal domain limits with an overlap. */
1453       xleft   = PetscMax(xstart - overlap,0);
1454       xright  = PetscMin(xstart + maxwidth + overlap,M);
1455       /*
1456 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1457       */
1458       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1459       if(nidx) {
1460         ++s;
1461       }
1462       xstart += maxwidth;
1463     }/* for(i = 0; i < Mdomains; ++i) */
1464     ystart += maxheight;
1465   }/* for(j = 0; j < Ndomains; ++j) */
1466   /* Now we can allocate the necessary number of ISs. */
1467   *nsub = s;
1468   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1469   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1470   s = 0;
1471   ystart = 0;
1472   for (j=0; j<Ndomains; ++j) {
1473     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1474     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1475     /* Vertical domain limits with an overlap. */
1476     y[0][0] = PetscMax(ystart - overlap,0);
1477     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1478     /* Vertical domain limits without an overlap. */
1479     y[1][0] = ystart;
1480     y[1][1] = PetscMin(ystart + maxheight,N);
1481     xstart = 0;
1482     for (i=0; i<Mdomains; ++i) {
1483       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1484       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1485       /* Horizontal domain limits with an overlap. */
1486       x[0][0]  = PetscMax(xstart - overlap,0);
1487       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1488       /* Horizontal domain limits without an overlap. */
1489       x[1][0] = xstart;
1490       x[1][1] = PetscMin(xstart+maxwidth,M);
1491       /*
1492 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1493 	 Do this twice: first for the domains with overlaps, and once without.
1494 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1495       */
1496       for(q = 0; q < 2; ++q) {
1497 	/*
1498 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1499 	  according to whether the domain with an overlap or without is considered.
1500 	*/
1501 	xleft = x[q][0]; xright = x[q][1];
1502 	ylow  = y[q][0]; yhigh  = y[q][1];
1503         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1504 	nidx *= dof;
1505         n[q] = nidx;
1506         /*
1507          Based on the counted number of indices in the local domain *with an overlap*,
1508          construct a subcommunicator of all the processors supporting this domain.
1509          Observe that a domain with an overlap might have nontrivial local support,
1510          while the domain without an overlap might not.  Hence, the decision to participate
1511          in the subcommunicator must be based on the domain with an overlap.
1512          */
1513 	if (q == 0) {
1514 	  if(nidx) {
1515 	    color = 1;
1516 	  } else {
1517 	    color = MPI_UNDEFINED;
1518 	  }
1519 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1520 	}
1521         /*
1522          Proceed only if the number of local indices *with an overlap* is nonzero.
1523          */
1524         if (n[0]) {
1525           if(q == 0) {
1526             iis = is;
1527           }
1528           if (q == 1) {
1529             /*
1530              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1531              Moreover, if the overlap is zero, the two ISs are identical.
1532              */
1533             if (overlap == 0) {
1534               (*is_local)[s] = (*is)[s];
1535               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1536               continue;
1537             } else {
1538               iis = is_local;
1539               subcomm = ((PetscObject)(*is)[s])->comm;
1540             }
1541           }/* if(q == 1) */
1542           idx = PETSC_NULL;
1543 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1544           if(nidx) {
1545             k    = 0;
1546             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1547               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1548               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1549               kk = dof*(M*jj + x0);
1550               for (ii=x0; ii<x1; ++ii) {
1551                 for(d = 0; d < dof; ++d) {
1552                   idx[k++] = kk++;
1553                 }
1554               }
1555             }
1556           }
1557 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1558 	}/* if(n[0]) */
1559       }/* for(q = 0; q < 2; ++q) */
1560       if(n[0]) {
1561         ++s;
1562       }
1563       xstart += maxwidth;
1564     }/* for(i = 0; i < Mdomains; ++i) */
1565     ystart += maxheight;
1566   }/* for(j = 0; j < Ndomains; ++j) */
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1572 /*@C
1573     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1574     only) for the additive Schwarz preconditioner.
1575 
1576     Not Collective
1577 
1578     Input Parameter:
1579 .   pc - the preconditioner context
1580 
1581     Output Parameters:
1582 +   n - the number of subdomains for this processor (default value = 1)
1583 .   is - the index sets that define the subdomains for this processor
1584 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1585 
1586 
1587     Notes:
1588     The IS numbering is in the parallel, global numbering of the vector.
1589 
1590     Level: advanced
1591 
1592 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1593 
1594 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1595           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1596 @*/
1597 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1598 {
1599   PC_GASM         *osm;
1600   PetscErrorCode ierr;
1601   PetscBool      match;
1602 
1603   PetscFunctionBegin;
1604   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1605   PetscValidIntPointer(n,2);
1606   if (is) PetscValidPointer(is,3);
1607   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1608   if (!match) {
1609     if (n)  *n  = 0;
1610     if (is) *is = PETSC_NULL;
1611   } else {
1612     osm = (PC_GASM*)pc->data;
1613     if (n)  *n  = osm->n;
1614     if (is) *is = osm->is;
1615     if (is_local) *is_local = osm->is_local;
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1622 /*@C
1623     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1624     only) for the additive Schwarz preconditioner.
1625 
1626     Not Collective
1627 
1628     Input Parameter:
1629 .   pc - the preconditioner context
1630 
1631     Output Parameters:
1632 +   n - the number of matrices for this processor (default value = 1)
1633 -   mat - the matrices
1634 
1635 
1636     Level: advanced
1637 
1638 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1639 
1640 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1641           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1642 @*/
1643 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1644 {
1645   PC_GASM         *osm;
1646   PetscErrorCode ierr;
1647   PetscBool      match;
1648 
1649   PetscFunctionBegin;
1650   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1651   PetscValidIntPointer(n,2);
1652   if (mat) PetscValidPointer(mat,3);
1653   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1654   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1655   if (!match) {
1656     if (n)   *n   = 0;
1657     if (mat) *mat = PETSC_NULL;
1658   } else {
1659     osm = (PC_GASM*)pc->data;
1660     if (n)   *n   = osm->n;
1661     if (mat) *mat = osm->pmat;
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665