xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision f2ed2dc71a2ab9ffda85eae8afa0cbea9ed570de)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73 #define len  16*(nidx+1)+1
74   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78   for (j = 0; j < nidx; ++j) {
79     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80   }
81   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91   ierr = PetscFree(cidx);CHKERRQ(ierr);
92   /* Outer subdomains. */
93   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94   /*
95    No more than 15 characters per index plus a space.
96    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98    For nidx == 0, the whole string 16 '\0'.
99    */
100 #define len  16*(nidx+1)+1
101   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
123 {
124   PC_GASM        *osm = (PC_GASM*)pc->data;
125   const char     *prefix;
126   char           fname[PETSC_MAX_PATH_LEN+1];
127   PetscInt       l, d, count;
128   PetscBool      doprint,found;
129   PetscViewer    viewer, sviewer = NULL;
130   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135   doprint  = PETSC_FALSE;
136   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
137   if (!doprint) PetscFunctionReturn(0);
138   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
139   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
140   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
141   /*
142    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144   */
145   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
146   l    = 0;
147   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
148   for (count = 0; count < osm->N; ++count) {
149     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150     if (l<osm->n) {
151       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152       if (numbering[d] == count) {
153         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
154         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
155         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156         ++l;
157       }
158     }
159     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
160   }
161   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
162   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167 {
168   PC_GASM        *osm = (PC_GASM*)pc->data;
169   const char     *prefix;
170   PetscErrorCode ierr;
171   PetscMPIInt    rank, size;
172   PetscInt       bsz;
173   PetscBool      iascii,view_subdomains=PETSC_FALSE;
174   PetscViewer    sviewer;
175   PetscInt       count, l;
176   char           overlap[256]     = "user-defined overlap";
177   char           gsubdomains[256] = "unknown total number of subdomains";
178   char           msubdomains[256] = "unknown max number of local subdomains";
179   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
180 
181   PetscFunctionBegin;
182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr);
183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr);
184 
185   if (osm->overlap >= 0) {
186     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
187   }
188   if (osm->N != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
190   }
191   if (osm->nmax != PETSC_DETERMINE) {
192     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
193   }
194 
195   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
196   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
197 
198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
199   if (iascii) {
200     /*
201      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203      collectively on the comm.
204      */
205     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
208     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
209     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
211     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
212     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
214     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
217     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
218     /* Make sure that everybody waits for the banner to be printed. */
219     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
220     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
222     l = 0;
223     for (count = 0; count < osm->N; ++count) {
224       PetscMPIInt srank, ssize;
225       if (l<osm->n) {
226         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227         if (numbering[d] == count) {
228           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr);
229           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr);
230           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
231           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
232           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
233           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
234           if (view_subdomains) {
235             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
236           }
237           if (!pc->setupcalled) {
238             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
239           } else {
240             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
241           }
242           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
243           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
244           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
245           ++l;
246         }
247       }
248       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
249     }
250     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
251     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252   }
253   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
254   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
259 
260 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
261 {
262    PC_GASM              *osm = (PC_GASM*)pc->data;
263    MatPartitioning       part;
264    MPI_Comm              comm;
265    PetscMPIInt           size;
266    PetscInt              nlocalsubdomains,fromrows_localsize;
267    IS                    partitioning,fromrows,isn;
268    Vec                   outervec;
269    PetscErrorCode        ierr;
270 
271    PetscFunctionBegin;
272    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
273    ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
274    /* we do not need a hierarchical partitioning when
275     * the total number of subdomains is consistent with
276     * the number of MPI tasks.
277     * For the following cases, we do not need to use HP
278     * */
279    if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
280    if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
281    nlocalsubdomains = size/osm->N;
282    osm->n           = 1;
283    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
284    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
285    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
286    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
287    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
288    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
289    /* get new processor owner number of each vertex */
290    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
291    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
292    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
293    ierr = ISDestroy(&isn);CHKERRQ(ierr);
294    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
295    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
296    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
297    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
298    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
299    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
300    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
301    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
302    osm->permutationIS = fromrows;
303    osm->pcmat =  pc->pmat;
304    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
305    pc->pmat = osm->permutationP;
306    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
307    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
308    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
309    osm->n           = PETSC_DETERMINE;
310    PetscFunctionReturn(0);
311 }
312 
313 static PetscErrorCode PCSetUp_GASM(PC pc)
314 {
315   PC_GASM        *osm = (PC_GASM*)pc->data;
316   PetscErrorCode ierr;
317   PetscInt       i,nInnerIndices,nTotalInnerIndices;
318   PetscMPIInt    rank, size;
319   MatReuse       scall = MAT_REUSE_MATRIX;
320   KSP            ksp;
321   PC             subpc;
322   const char     *prefix,*pprefix;
323   Vec            x,y;
324   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
325   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
326   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
327   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
328   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
329   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
330   PetscScalar    *gxarray, *gyarray;
331   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
332   PetscInt       num_subdomains    = 0;
333   DM             *subdomain_dm     = NULL;
334   char           **subdomain_names = NULL;
335   PetscInt       *numbering;
336 
337 
338   PetscFunctionBegin;
339   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
340   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
341   if (!pc->setupcalled) {
342 	/* use a hierarchical partitioning */
343     if(osm->hierarchicalpartitioning){
344       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
345     }
346     if (osm->n == PETSC_DETERMINE) {
347       if (osm->N != PETSC_DETERMINE) {
348 	   /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
349 	   ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
350       } else if (osm->dm_subdomains && pc->dm) {
351 	/* try pc->dm next, if allowed */
352 	PetscInt  d;
353 	IS       *inner_subdomain_is, *outer_subdomain_is;
354 	ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
355 	if (num_subdomains) {
356 	  ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
357 	}
358 	for (d = 0; d < num_subdomains; ++d) {
359 	  if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
360 	  if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
361 	}
362 	ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
363 	ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
364       } else {
365 	/* still no subdomains; use one per processor */
366 	osm->nmax = osm->n = 1;
367 	ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
368 	osm->N    = size;
369 	ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
370       }
371     }
372     if (!osm->iis) {
373       /*
374        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
375        We create the requisite number of local inner subdomains and then expand them into
376        out subdomains, if necessary.
377        */
378       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
379     }
380     if (!osm->ois) {
381       /*
382 	    Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
383 	    has been requested, copy the inner subdomains over so they can be modified.
384       */
385       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
386       for (i=0; i<osm->n; ++i) {
387 	if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
388 	  ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
389 	  ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
390 	} else {
391 	  ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
392 	  osm->ois[i] = osm->iis[i];
393 	}
394       }
395       if (osm->overlap>0 && osm->N>1) {
396 	   /* Extend the "overlapping" regions by a number of steps */
397 	   ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
398       }
399     }
400 
401     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
402     if (osm->nmax == PETSC_DETERMINE) {
403       PetscMPIInt inwork,outwork;
404       /* determine global number of subdomains and the max number of local subdomains */
405       inwork = osm->n;
406       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
407       osm->nmax  = outwork;
408     }
409     if (osm->N == PETSC_DETERMINE) {
410       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
411       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
412     }
413 
414 
415     if (osm->sort_indices) {
416       for (i=0; i<osm->n; i++) {
417         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
418         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
419       }
420     }
421     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
422     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
423 
424     /*
425        Merge the ISs, create merged vectors and restrictions.
426      */
427     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
428     on = 0;
429     for (i=0; i<osm->n; i++) {
430       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
431       on  += oni;
432     }
433     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
434     on   = 0;
435     /* Merge local indices together */
436     for (i=0; i<osm->n; i++) {
437       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
438       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
439       ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
440       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
441       on  += oni;
442     }
443     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
444     nTotalInnerIndices = 0;
445     for(i=0; i<osm->n; i++){
446       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
447       nTotalInnerIndices += nInnerIndices;
448     }
449     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
450     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
451 
452     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
453     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
454     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
455     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
456     /* gois might indices not on local */
457     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
458     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
459     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
460     ierr = VecDestroy(&x);CHKERRQ(ierr);
461     ierr = ISDestroy(&gois);CHKERRQ(ierr);
462 
463     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
464     {
465       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
466       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
467       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
468       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
469       IS              giis;          /* IS for the disjoint union of inner subdomains. */
470       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
471       PetscScalar    *array;
472       const PetscInt *indices;
473       PetscInt        k;
474       on = 0;
475       for (i=0; i<osm->n; i++) {
476         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
477         on  += oni;
478       }
479       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
480       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
481       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
482       /* set communicator id to determine where overlap is */
483       in   = 0;
484       for (i=0; i<osm->n; i++) {
485         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
486         for (k = 0; k < ini; ++k){
487           array[in+k] = numbering[i];
488         }
489         in += ini;
490       }
491       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
492       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
494       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
495       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
496       on  = 0;
497       in  = 0;
498       for (i=0; i<osm->n; i++) {
499     	ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
500     	ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
501     	for (k=0; k<oni; k++) {
502           /*  skip overlapping indices to get inner domain */
503           if(PetscRealPart(array[on+k]) != numbering[i]) continue;
504           iidx[in]    = indices[k];
505           ioidx[in++] = gostart+on+k;
506     	}
507     	ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
508     	on += oni;
509       }
510       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
511       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
512       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
513       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
514       ierr = VecDestroy(&y);CHKERRQ(ierr);
515       ierr = ISDestroy(&giis);CHKERRQ(ierr);
516       ierr = ISDestroy(&giois);CHKERRQ(ierr);
517     }
518     ierr = ISDestroy(&goid);CHKERRQ(ierr);
519     ierr = PetscFree(numbering);CHKERRQ(ierr);
520 
521     /* Create the subdomain work vectors. */
522     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
523     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
524     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
525     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
526     for (i=0, on=0; i<osm->n; ++i, on += oni) {
527       PetscInt oNi;
528       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
529       /* on a sub communicator */
530       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
531       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
532       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
533     }
534     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
535     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
536     /* Create the subdomain solvers */
537     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
538     for (i=0; i<osm->n; i++) {
539       char subprefix[PETSC_MAX_PATH_LEN+1];
540       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
541       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
542       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
543       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
544       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
545       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
546       if (subdomain_dm) {
547 	    ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
548 	    ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
549       }
550       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
551       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
552       if (subdomain_names && subdomain_names[i]) {
553 	     ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
554 	     ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
555 	     ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
556       }
557       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
558       osm->ksp[i] = ksp;
559     }
560     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
561     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
562     scall = MAT_INITIAL_MATRIX;
563 
564   } else { /* if (pc->setupcalled) */
565     /*
566        Destroy the submatrices from the previous iteration
567     */
568     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
569       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
570       scall = MAT_INITIAL_MATRIX;
571     }
572     if(osm->permutationIS){
573       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
574       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
575       osm->pcmat = pc->pmat;
576       pc->pmat   = osm->permutationP;
577     }
578 
579   }
580 
581 
582   /*
583      Extract out the submatrices.
584   */
585   if (size > 1) {
586     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
587   } else {
588     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
589   }
590   if (scall == MAT_INITIAL_MATRIX) {
591     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
592     for (i=0; i<osm->n; i++) {
593       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
594       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
595     }
596   }
597 
598   /* Return control to the user so that the submatrices can be modified (e.g., to apply
599      different boundary conditions for the submatrices than for the global problem) */
600   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
601 
602   /*
603      Loop over submatrices putting them into local ksps
604   */
605   for (i=0; i<osm->n; i++) {
606     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
607     if (!pc->setupcalled) {
608       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
609     }
610   }
611   if(osm->pcmat){
612     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
613     pc->pmat   = osm->pcmat;
614     osm->pcmat = NULL;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
620 {
621   PC_GASM        *osm = (PC_GASM*)pc->data;
622   PetscErrorCode ierr;
623   PetscInt       i;
624 
625   PetscFunctionBegin;
626   for (i=0; i<osm->n; i++) {
627     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
633 {
634   PC_GASM        *osm = (PC_GASM*)pc->data;
635   PetscErrorCode ierr;
636   PetscInt       i;
637   Vec            x,y;
638   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
639 
640   PetscFunctionBegin;
641   if (osm->pctoouter) {
642     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
643     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
644     x = osm->pcx;
645     y = osm->pcy;
646   } else {
647     x = xin;
648     y = yout;
649   }
650   /*
651      support for limiting the restriction or interpolation only to the inner
652      subdomain values (leaving the other values 0).
653   */
654   if (!(osm->type & PC_GASM_RESTRICT)) {
655     /* have to zero the work RHS since scatter may leave some slots empty */
656     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
657     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
658   } else {
659     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
660   }
661   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
662   if (!(osm->type & PC_GASM_RESTRICT)) {
663     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
664   } else {
665     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
666   }
667   /* do the subdomain solves */
668   for (i=0; i<osm->n; ++i) {
669     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
670     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
671   }
672   /* do we need to zero y? */
673   ierr = VecZeroEntries(y);CHKERRQ(ierr);
674   if (!(osm->type & PC_GASM_INTERPOLATE)) {
675     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
676     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
677   } else {
678     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
679     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
680   }
681   if (osm->pctoouter) {
682     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
683     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
684   }
685   PetscFunctionReturn(0);
686 }
687 
688 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
689 {
690   PC_GASM        *osm = (PC_GASM*)pc->data;
691   Mat            X,Y,O=NULL,Z,W;
692   Vec            x,y;
693   PetscInt       i,m,M,N;
694   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
699   ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
700   if (osm->pctoouter) {
701     ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
702     ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
703     ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
704     for (i = 0; i < N; ++i) {
705       ierr = MatDenseGetColumnVecRead(Xin,i,&x);
706       ierr = MatDenseGetColumnVecWrite(O,i,&y);
707       ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
708       ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
709       ierr = MatDenseRestoreColumnVecWrite(O,i,&y);
710       ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);
711     }
712     X = Y = O;
713   } else {
714     X = Xin;
715     Y = Yout;
716   }
717   /*
718      support for limiting the restriction or interpolation only to the inner
719      subdomain values (leaving the other values 0).
720   */
721   ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
722   ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
723   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
724   for (i = 0; i < N; ++i) {
725     ierr = MatDenseGetColumnVecRead(X,i,&x);
726     ierr = MatDenseGetColumnVecWrite(Z,i,&y);
727     if (!(osm->type & PC_GASM_RESTRICT)) {
728       /* have to zero the work RHS since scatter may leave some slots empty */
729       ierr = VecZeroEntries(y);CHKERRQ(ierr);
730       ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
731       ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
732     } else {
733       ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
734       ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
735     }
736     ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);
737     ierr = MatDenseRestoreColumnVecRead(X,i,&x);
738   }
739   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
740   ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
741   ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
742   ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
743   /* do the subdomain solve */
744   ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
745   ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
746   ierr = MatDestroy(&Z);CHKERRQ(ierr);
747   /* do we need to zero y? */
748   ierr = MatZeroEntries(Y);CHKERRQ(ierr);
749   for (i = 0; i < N; ++i) {
750     ierr = MatDenseGetColumnVecWrite(Y,i,&y);
751     ierr = MatDenseGetColumnVecRead(W,i,&x);
752     if (!(osm->type & PC_GASM_INTERPOLATE)) {
753       ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
754       ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
755     } else {
756       ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
757       ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
758     }
759     ierr = MatDenseRestoreColumnVecRead(W,i,&x);
760     if (osm->pctoouter) {
761       ierr = MatDenseGetColumnVecWrite(Yout,i,&x);
762       ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
763       ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
764       ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);
765     }
766     ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);
767   }
768   ierr = MatDestroy(&W);CHKERRQ(ierr);
769   ierr = MatDestroy(&O);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 
773 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
774 {
775   PC_GASM        *osm = (PC_GASM*)pc->data;
776   PetscErrorCode ierr;
777   PetscInt       i;
778   Vec            x,y;
779   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
780 
781   PetscFunctionBegin;
782   if(osm->pctoouter){
783    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785    x = osm->pcx;
786    y = osm->pcy;
787   }else{
788 	x = xin;
789 	y = yout;
790   }
791   /*
792      Support for limiting the restriction or interpolation to only local
793      subdomain values (leaving the other values 0).
794 
795      Note: these are reversed from the PCApply_GASM() because we are applying the
796      transpose of the three terms
797   */
798   if (!(osm->type & PC_GASM_INTERPOLATE)) {
799     /* have to zero the work RHS since scatter may leave some slots empty */
800     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
801     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
802   } else {
803     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
804   }
805   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
806   if (!(osm->type & PC_GASM_INTERPOLATE)) {
807     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
808   } else {
809     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
810   }
811   /* do the local solves */
812   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
813     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
814     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
815   }
816   ierr = VecZeroEntries(y);CHKERRQ(ierr);
817   if (!(osm->type & PC_GASM_RESTRICT)) {
818     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
819     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
820   } else {
821     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
822     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
823   }
824   if(osm->pctoouter){
825    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 static PetscErrorCode PCReset_GASM(PC pc)
832 {
833   PC_GASM        *osm = (PC_GASM*)pc->data;
834   PetscErrorCode ierr;
835   PetscInt       i;
836 
837   PetscFunctionBegin;
838   if (osm->ksp) {
839     for (i=0; i<osm->n; i++) {
840       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
841     }
842   }
843   if (osm->pmat) {
844     if (osm->n > 0) {
845       PetscMPIInt size;
846       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
847       if (size > 1) {
848         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
849         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
850       } else {
851         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
852       }
853     }
854   }
855   if (osm->x) {
856     for (i=0; i<osm->n; i++) {
857       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
858       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
859     }
860   }
861   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
862   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
863 
864   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
865   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
866   if (!osm->user_subdomains) {
867     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
868     osm->N    = PETSC_DETERMINE;
869     osm->nmax = PETSC_DETERMINE;
870   }
871   if(osm->pctoouter){
872 	ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
873   }
874   if(osm->permutationIS){
875 	ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
876   }
877   if(osm->pcx){
878 	ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
879   }
880   if(osm->pcy){
881 	ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
882   }
883   if(osm->permutationP){
884     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
885   }
886   if(osm->pcmat){
887 	ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
888   }
889   PetscFunctionReturn(0);
890 }
891 
892 static PetscErrorCode PCDestroy_GASM(PC pc)
893 {
894   PC_GASM        *osm = (PC_GASM*)pc->data;
895   PetscErrorCode ierr;
896   PetscInt       i;
897 
898   PetscFunctionBegin;
899   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
900   /* PCReset will not destroy subdomains, if user_subdomains is true. */
901   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
902   if (osm->ksp) {
903     for (i=0; i<osm->n; i++) {
904       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
905     }
906     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
907   }
908   ierr = PetscFree(osm->x);CHKERRQ(ierr);
909   ierr = PetscFree(osm->y);CHKERRQ(ierr);
910   ierr = PetscFree(pc->data);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
915 {
916   PC_GASM        *osm = (PC_GASM*)pc->data;
917   PetscErrorCode ierr;
918   PetscInt       blocks,ovl;
919   PetscBool      flg;
920   PCGASMType     gasmtype;
921 
922   PetscFunctionBegin;
923   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
924   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
925   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
926   if (flg) {
927     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
928   }
929   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
930   if (flg) {
931     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
932     osm->dm_subdomains = PETSC_FALSE;
933   }
934   flg  = PETSC_FALSE;
935   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
936   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
937   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
938   ierr = PetscOptionsTail();CHKERRQ(ierr);
939   PetscFunctionReturn(0);
940 }
941 
942 /*------------------------------------------------------------------------------------*/
943 
944 /*@
945     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
946                                communicator.
947     Logically collective on pc
948 
949     Input Parameters:
950 +   pc  - the preconditioner
951 -   N   - total number of subdomains
952 
953 
954     Level: beginner
955 
956 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
957           PCGASMCreateSubdomains2D()
958 @*/
959 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
960 {
961   PC_GASM        *osm = (PC_GASM*)pc->data;
962   PetscMPIInt    size,rank;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
967   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
968 
969   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
970   osm->ois = osm->iis = NULL;
971 
972   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
973   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
974   osm->N    = N;
975   osm->n    = PETSC_DETERMINE;
976   osm->nmax = PETSC_DETERMINE;
977   osm->dm_subdomains = PETSC_FALSE;
978   PetscFunctionReturn(0);
979 }
980 
981 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
982 {
983   PC_GASM         *osm = (PC_GASM*)pc->data;
984   PetscErrorCode  ierr;
985   PetscInt        i;
986 
987   PetscFunctionBegin;
988   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
989   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
990 
991   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
992   osm->iis  = osm->ois = NULL;
993   osm->n    = n;
994   osm->N    = PETSC_DETERMINE;
995   osm->nmax = PETSC_DETERMINE;
996   if (ois) {
997     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
998     for (i=0; i<n; i++) {
999       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
1000       osm->ois[i] = ois[i];
1001     }
1002     /*
1003        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1004        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1005     */
1006     osm->overlap = -1;
1007     /* inner subdomains must be provided  */
1008     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1009   }/* end if */
1010   if (iis) {
1011     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1012     for (i=0; i<n; i++) {
1013       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1014       osm->iis[i] = iis[i];
1015     }
1016     if (!ois) {
1017       osm->ois = NULL;
1018       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
1019     }
1020   }
1021   if (PetscDefined(USE_DEBUG)) {
1022     PetscInt        j,rstart,rend,*covered,lsize;
1023     const PetscInt  *indices;
1024     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1025     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1026     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1027     /* check if the current processor owns indices from others */
1028     for (i=0; i<n; i++) {
1029       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1030       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1031       for (j=0; j<lsize; j++) {
1032         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1033         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1034         else covered[indices[j]-rstart] = 1;
1035       }
1036     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1037     }
1038     /* check if we miss any indices */
1039     for (i=rstart; i<rend; i++) {
1040       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1041     }
1042     ierr = PetscFree(covered);CHKERRQ(ierr);
1043   }
1044   if (iis)  osm->user_subdomains = PETSC_TRUE;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1049 {
1050   PC_GASM *osm = (PC_GASM*)pc->data;
1051 
1052   PetscFunctionBegin;
1053   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1054   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1055   if (!pc->setupcalled) osm->overlap = ovl;
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1060 {
1061   PC_GASM *osm = (PC_GASM*)pc->data;
1062 
1063   PetscFunctionBegin;
1064   osm->type     = type;
1065   osm->type_set = PETSC_TRUE;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1070 {
1071   PC_GASM *osm = (PC_GASM*)pc->data;
1072 
1073   PetscFunctionBegin;
1074   osm->sort_indices = doSort;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /*
1079    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1080         In particular, it would upset the global subdomain number calculation.
1081 */
1082 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1083 {
1084   PC_GASM        *osm = (PC_GASM*)pc->data;
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1089 
1090   if (n) *n = osm->n;
1091   if (first) {
1092     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1093     *first -= osm->n;
1094   }
1095   if (ksp) {
1096     /* Assume that local solves are now different; not necessarily
1097        true, though!  This flag is used only for PCView_GASM() */
1098     *ksp                        = osm->ksp;
1099     osm->same_subdomain_solvers = PETSC_FALSE;
1100   }
1101   PetscFunctionReturn(0);
1102 } /* PCGASMGetSubKSP_GASM() */
1103 
1104 /*@C
1105     PCGASMSetSubdomains - Sets the subdomains for this processor
1106     for the additive Schwarz preconditioner.
1107 
1108     Collective on pc
1109 
1110     Input Parameters:
1111 +   pc  - the preconditioner object
1112 .   n   - the number of subdomains for this processor
1113 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1114 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1115 
1116     Notes:
1117     The IS indices use the parallel, global numbering of the vector entries.
1118     Inner subdomains are those where the correction is applied.
1119     Outer subdomains are those where the residual necessary to obtain the
1120     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1121     Both inner and outer subdomains can extend over several processors.
1122     This processor's portion of a subdomain is known as a local subdomain.
1123 
1124     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1125     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1126     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1127     on another MPI process.
1128 
1129     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1130 
1131 
1132     Level: advanced
1133 
1134 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1135           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1136 @*/
1137 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1138 {
1139   PC_GASM *osm = (PC_GASM*)pc->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1144   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1145   osm->dm_subdomains = PETSC_FALSE;
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 /*@
1150     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1151     additive Schwarz preconditioner.  Either all or no processors in the
1152     pc communicator must call this routine.
1153 
1154     Logically Collective on pc
1155 
1156     Input Parameters:
1157 +   pc  - the preconditioner context
1158 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1159 
1160     Options Database Key:
1161 .   -pc_gasm_overlap <overlap> - Sets overlap
1162 
1163     Notes:
1164     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1165     multiple subdomain per perocessor or "straddling" subdomains that intersect
1166     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1167 
1168     The overlap defaults to 0, so if one desires that no additional
1169     overlap be computed beyond what may have been set with a call to
1170     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1171     not explicitly set the subdomains in application code, then all overlap would be computed
1172     internally by PETSc, and using an overlap of 0 would result in an GASM
1173     variant that is equivalent to the block Jacobi preconditioner.
1174 
1175     Note that one can define initial index sets with any overlap via
1176     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1177     PETSc to extend that overlap further, if desired.
1178 
1179     Level: intermediate
1180 
1181 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1182           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1183 @*/
1184 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1185 {
1186   PetscErrorCode ierr;
1187   PC_GASM *osm = (PC_GASM*)pc->data;
1188 
1189   PetscFunctionBegin;
1190   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1191   PetscValidLogicalCollectiveInt(pc,ovl,2);
1192   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1193   osm->dm_subdomains = PETSC_FALSE;
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 /*@
1198     PCGASMSetType - Sets the type of restriction and interpolation used
1199     for local problems in the additive Schwarz method.
1200 
1201     Logically Collective on PC
1202 
1203     Input Parameters:
1204 +   pc  - the preconditioner context
1205 -   type - variant of GASM, one of
1206 .vb
1207       PC_GASM_BASIC       - full interpolation and restriction
1208       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1209       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1210       PC_GASM_NONE        - local processor restriction and interpolation
1211 .ve
1212 
1213     Options Database Key:
1214 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1215 
1216     Level: intermediate
1217 
1218 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1219           PCGASMCreateSubdomains2D()
1220 @*/
1221 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1222 {
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1227   PetscValidLogicalCollectiveEnum(pc,type,2);
1228   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 /*@
1233     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1234 
1235     Logically Collective on PC
1236 
1237     Input Parameters:
1238 +   pc  - the preconditioner context
1239 -   doSort - sort the subdomain indices
1240 
1241     Level: intermediate
1242 
1243 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1244           PCGASMCreateSubdomains2D()
1245 @*/
1246 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1247 {
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1252   PetscValidLogicalCollectiveBool(pc,doSort,2);
1253   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 /*@C
1258    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1259    this processor.
1260 
1261    Collective on PC iff first_local is requested
1262 
1263    Input Parameter:
1264 .  pc - the preconditioner context
1265 
1266    Output Parameters:
1267 +  n_local - the number of blocks on this processor or NULL
1268 .  first_local - the global number of the first block on this processor or NULL,
1269                  all processors must request or all must pass NULL
1270 -  ksp - the array of KSP contexts
1271 
1272    Note:
1273    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1274 
1275    Currently for some matrix implementations only 1 block per processor
1276    is supported.
1277 
1278    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1279 
1280    Level: advanced
1281 
1282 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1283           PCGASMCreateSubdomains2D(),
1284 @*/
1285 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1286 {
1287   PetscErrorCode ierr;
1288 
1289   PetscFunctionBegin;
1290   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1291   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 /* -------------------------------------------------------------------------------------*/
1296 /*MC
1297    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1298            its own KSP object.
1299 
1300    Options Database Keys:
1301 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1302 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1303 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1304 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1305 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1306 
1307      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1308       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1309       -pc_gasm_type basic to use the standard GASM.
1310 
1311    Notes:
1312     Blocks can be shared by multiple processes.
1313 
1314      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1315         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1316 
1317      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1318          and set the options directly on the resulting KSP object (you can access its PC
1319          with KSPGetPC())
1320 
1321 
1322    Level: beginner
1323 
1324     References:
1325 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1326      Courant Institute, New York University Technical report
1327 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1328     Cambridge University Press.
1329 
1330 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1331            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1332            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1333 
1334 M*/
1335 
1336 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1337 {
1338   PetscErrorCode ierr;
1339   PC_GASM        *osm;
1340 
1341   PetscFunctionBegin;
1342   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1343 
1344   osm->N                        = PETSC_DETERMINE;
1345   osm->n                        = PETSC_DECIDE;
1346   osm->nmax                     = PETSC_DETERMINE;
1347   osm->overlap                  = 0;
1348   osm->ksp                      = NULL;
1349   osm->gorestriction            = NULL;
1350   osm->girestriction            = NULL;
1351   osm->pctoouter                = NULL;
1352   osm->gx                       = NULL;
1353   osm->gy                       = NULL;
1354   osm->x                        = NULL;
1355   osm->y                        = NULL;
1356   osm->pcx                      = NULL;
1357   osm->pcy                      = NULL;
1358   osm->permutationIS            = NULL;
1359   osm->permutationP             = NULL;
1360   osm->pcmat                    = NULL;
1361   osm->ois                      = NULL;
1362   osm->iis                      = NULL;
1363   osm->pmat                     = NULL;
1364   osm->type                     = PC_GASM_RESTRICT;
1365   osm->same_subdomain_solvers   = PETSC_TRUE;
1366   osm->sort_indices             = PETSC_TRUE;
1367   osm->dm_subdomains            = PETSC_FALSE;
1368   osm->hierarchicalpartitioning = PETSC_FALSE;
1369 
1370   pc->data                 = (void*)osm;
1371   pc->ops->apply           = PCApply_GASM;
1372   pc->ops->matapply        = PCMatApply_GASM;
1373   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1374   pc->ops->setup           = PCSetUp_GASM;
1375   pc->ops->reset           = PCReset_GASM;
1376   pc->ops->destroy         = PCDestroy_GASM;
1377   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1378   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1379   pc->ops->view            = PCView_GASM;
1380   pc->ops->applyrichardson = NULL;
1381 
1382   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1391 {
1392   MatPartitioning mpart;
1393   const char      *prefix;
1394   PetscInt        i,j,rstart,rend,bs;
1395   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1396   Mat             Ad     = NULL, adj;
1397   IS              ispart,isnumb,*is;
1398   PetscErrorCode  ierr;
1399 
1400   PetscFunctionBegin;
1401   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1402 
1403   /* Get prefix, row distribution, and block size */
1404   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1405   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1406   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1407   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1408 
1409   /* Get diagonal block from matrix if possible */
1410   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1411   if (hasop) {
1412     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1413   }
1414   if (Ad) {
1415     ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1416     if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1417   }
1418   if (Ad && nloc > 1) {
1419     PetscBool  match,done;
1420     /* Try to setup a good matrix partitioning if available */
1421     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1422     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1423     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1424     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1425     if (!match) {
1426       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1427     }
1428     if (!match) { /* assume a "good" partitioner is available */
1429       PetscInt       na;
1430       const PetscInt *ia,*ja;
1431       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1432       if (done) {
1433         /* Build adjacency matrix by hand. Unfortunately a call to
1434            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1435            remove the block-aij structure and we cannot expect
1436            MatPartitioning to split vertices as we need */
1437         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1438         const PetscInt *row;
1439         nnz = 0;
1440         for (i=0; i<na; i++) { /* count number of nonzeros */
1441           len = ia[i+1] - ia[i];
1442           row = ja + ia[i];
1443           for (j=0; j<len; j++) {
1444             if (row[j] == i) { /* don't count diagonal */
1445               len--; break;
1446             }
1447           }
1448           nnz += len;
1449         }
1450         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1451         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1452         nnz    = 0;
1453         iia[0] = 0;
1454         for (i=0; i<na; i++) { /* fill adjacency */
1455           cnt = 0;
1456           len = ia[i+1] - ia[i];
1457           row = ja + ia[i];
1458           for (j=0; j<len; j++) {
1459             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1460           }
1461           nnz += cnt;
1462           iia[i+1] = nnz;
1463         }
1464         /* Partitioning of the adjacency matrix */
1465         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1466         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1467         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1468         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1469         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1470         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1471         foundpart = PETSC_TRUE;
1472       }
1473       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1474     }
1475     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1476   }
1477   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1478   if (!foundpart) {
1479 
1480     /* Partitioning by contiguous chunks of rows */
1481 
1482     PetscInt mbs   = (rend-rstart)/bs;
1483     PetscInt start = rstart;
1484     for (i=0; i<nloc; i++) {
1485       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1486       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1487       start += count;
1488     }
1489 
1490   } else {
1491 
1492     /* Partitioning by adjacency of diagonal block  */
1493 
1494     const PetscInt *numbering;
1495     PetscInt       *count,nidx,*indices,*newidx,start=0;
1496     /* Get node count in each partition */
1497     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1498     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1499     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1500       for (i=0; i<nloc; i++) count[i] *= bs;
1501     }
1502     /* Build indices from node numbering */
1503     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1504     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1505     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1506     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1507     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1508     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1509     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1510       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1511       for (i=0; i<nidx; i++) {
1512         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1513       }
1514       ierr    = PetscFree(indices);CHKERRQ(ierr);
1515       nidx   *= bs;
1516       indices = newidx;
1517     }
1518     /* Shift to get global indices */
1519     for (i=0; i<nidx; i++) indices[i] += rstart;
1520 
1521     /* Build the index sets for each block */
1522     for (i=0; i<nloc; i++) {
1523       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1524       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1525       start += count[i];
1526     }
1527 
1528     ierr = PetscFree(count);CHKERRQ(ierr);
1529     ierr = PetscFree(indices);CHKERRQ(ierr);
1530     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1531     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1532   }
1533   *iis = is;
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1538 {
1539   PetscErrorCode  ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /*@C
1547    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1548    Schwarz preconditioner for a any problem based on its matrix.
1549 
1550    Collective
1551 
1552    Input Parameters:
1553 +  A       - The global matrix operator
1554 -  N       - the number of global subdomains requested
1555 
1556    Output Parameters:
1557 +  n   - the number of subdomains created on this processor
1558 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1559 
1560    Level: advanced
1561 
1562    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1563          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1564          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1565 	 outer subdomains will be automatically generated from these according to the requested amount of
1566 	 overlap; this is currently supported only with local subdomains.
1567 
1568 
1569 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1570 @*/
1571 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1572 {
1573   PetscMPIInt     size;
1574   PetscErrorCode  ierr;
1575 
1576   PetscFunctionBegin;
1577   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1578   PetscValidPointer(iis,4);
1579 
1580   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1581   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1582   if (N >= size) {
1583     *n = N/size + (N%size);
1584     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1585   } else {
1586     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 /*@C
1592    PCGASMDestroySubdomains - Destroys the index sets created with
1593    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1594    called after setting subdomains with PCGASMSetSubdomains().
1595 
1596    Collective
1597 
1598    Input Parameters:
1599 +  n   - the number of index sets
1600 .  iis - the array of inner subdomains,
1601 -  ois - the array of outer subdomains, can be NULL
1602 
1603    Level: intermediate
1604 
1605    Notes:
1606     this is merely a convenience subroutine that walks each list,
1607    destroys each IS on the list, and then frees the list. At the end the
1608    list pointers are set to NULL.
1609 
1610 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1611 @*/
1612 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1613 {
1614   PetscInt       i;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   if (n <= 0) PetscFunctionReturn(0);
1619   if (ois) {
1620     PetscValidPointer(ois,3);
1621     if (*ois) {
1622       PetscValidPointer(*ois,3);
1623       for (i=0; i<n; i++) {
1624         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1625       }
1626       ierr = PetscFree((*ois));CHKERRQ(ierr);
1627     }
1628   }
1629   if (iis) {
1630     PetscValidPointer(iis,2);
1631     if (*iis) {
1632       PetscValidPointer(*iis,2);
1633       for (i=0; i<n; i++) {
1634         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1635       }
1636       ierr = PetscFree((*iis));CHKERRQ(ierr);
1637     }
1638   }
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1643   {                                                                                                       \
1644     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1645     /*                                                                                                    \
1646      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1647      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1648      subdomain).                                                                                          \
1649      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1650      of the intersection.                                                                                 \
1651     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1652     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1653     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1654     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1655     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1656     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1657     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1658     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1659     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1660     /* Now compute the size of the local subdomain n. */ \
1661     *n = 0;                                               \
1662     if (*ylow_loc < *yhigh_loc) {                           \
1663       PetscInt width = xright-xleft;                     \
1664       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1665       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1666       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1667     } \
1668   }
1669 
1670 /*@
1671    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1672    preconditioner for a two-dimensional problem on a regular grid.
1673 
1674    Collective
1675 
1676    Input Parameters:
1677 +  M, N               - the global number of grid points in the x and y directions
1678 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1679 .  dof                - degrees of freedom per node
1680 -  overlap            - overlap in mesh lines
1681 
1682    Output Parameters:
1683 +  Nsub - the number of local subdomains created
1684 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1685 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1686 
1687 
1688    Level: advanced
1689 
1690 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1691 @*/
1692 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1693 {
1694   PetscErrorCode ierr;
1695   PetscMPIInt    size, rank;
1696   PetscInt       i, j;
1697   PetscInt       maxheight, maxwidth;
1698   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1699   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1700   PetscInt       x[2][2], y[2][2], n[2];
1701   PetscInt       first, last;
1702   PetscInt       nidx, *idx;
1703   PetscInt       ii,jj,s,q,d;
1704   PetscInt       k,kk;
1705   PetscMPIInt    color;
1706   MPI_Comm       comm, subcomm;
1707   IS             **xis = NULL, **is = ois, **is_local = iis;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1711   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1712   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1713   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1714   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1715                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1716 
1717   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1718   s      = 0;
1719   ystart = 0;
1720   for (j=0; j<Ndomains; ++j) {
1721     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1722     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1723     /* Vertical domain limits with an overlap. */
1724     ylow   = PetscMax(ystart - overlap,0);
1725     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1726     xstart = 0;
1727     for (i=0; i<Mdomains; ++i) {
1728       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1729       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1730       /* Horizontal domain limits with an overlap. */
1731       xleft  = PetscMax(xstart - overlap,0);
1732       xright = PetscMin(xstart + maxwidth + overlap,M);
1733       /*
1734          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1735       */
1736       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1737       if (nidx) ++s;
1738       xstart += maxwidth;
1739     } /* for (i = 0; i < Mdomains; ++i) */
1740     ystart += maxheight;
1741   } /* for (j = 0; j < Ndomains; ++j) */
1742 
1743   /* Now we can allocate the necessary number of ISs. */
1744   *nsub  = s;
1745   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1746   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1747   s      = 0;
1748   ystart = 0;
1749   for (j=0; j<Ndomains; ++j) {
1750     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1751     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1752     /* Vertical domain limits with an overlap. */
1753     y[0][0] = PetscMax(ystart - overlap,0);
1754     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1755     /* Vertical domain limits without an overlap. */
1756     y[1][0] = ystart;
1757     y[1][1] = PetscMin(ystart + maxheight,N);
1758     xstart  = 0;
1759     for (i=0; i<Mdomains; ++i) {
1760       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1761       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1762       /* Horizontal domain limits with an overlap. */
1763       x[0][0] = PetscMax(xstart - overlap,0);
1764       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1765       /* Horizontal domain limits without an overlap. */
1766       x[1][0] = xstart;
1767       x[1][1] = PetscMin(xstart+maxwidth,M);
1768       /*
1769          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1770          Do this twice: first for the domains with overlaps, and once without.
1771          During the first pass create the subcommunicators, and use them on the second pass as well.
1772       */
1773       for (q = 0; q < 2; ++q) {
1774         PetscBool split = PETSC_FALSE;
1775         /*
1776           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1777           according to whether the domain with an overlap or without is considered.
1778         */
1779         xleft = x[q][0]; xright = x[q][1];
1780         ylow  = y[q][0]; yhigh  = y[q][1];
1781         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1782         nidx *= dof;
1783         n[q]  = nidx;
1784         /*
1785          Based on the counted number of indices in the local domain *with an overlap*,
1786          construct a subcommunicator of all the processors supporting this domain.
1787          Observe that a domain with an overlap might have nontrivial local support,
1788          while the domain without an overlap might not.  Hence, the decision to participate
1789          in the subcommunicator must be based on the domain with an overlap.
1790          */
1791         if (q == 0) {
1792           if (nidx) color = 1;
1793           else color = MPI_UNDEFINED;
1794           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1795           split = PETSC_TRUE;
1796         }
1797         /*
1798          Proceed only if the number of local indices *with an overlap* is nonzero.
1799          */
1800         if (n[0]) {
1801           if (q == 0) xis = is;
1802           if (q == 1) {
1803             /*
1804              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1805              Moreover, if the overlap is zero, the two ISs are identical.
1806              */
1807             if (overlap == 0) {
1808               (*is_local)[s] = (*is)[s];
1809               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1810               continue;
1811             } else {
1812               xis     = is_local;
1813               subcomm = ((PetscObject)(*is)[s])->comm;
1814             }
1815           } /* if (q == 1) */
1816           idx  = NULL;
1817           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1818           if (nidx) {
1819             k = 0;
1820             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1821               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1822               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1823               kk = dof*(M*jj + x0);
1824               for (ii=x0; ii<x1; ++ii) {
1825                 for (d = 0; d < dof; ++d) {
1826                   idx[k++] = kk++;
1827                 }
1828               }
1829             }
1830           }
1831           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1832           if (split) {
1833             ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
1834           }
1835         }/* if (n[0]) */
1836       }/* for (q = 0; q < 2; ++q) */
1837       if (n[0]) ++s;
1838       xstart += maxwidth;
1839     } /* for (i = 0; i < Mdomains; ++i) */
1840     ystart += maxheight;
1841   } /* for (j = 0; j < Ndomains; ++j) */
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 /*@C
1846     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1847     for the additive Schwarz preconditioner.
1848 
1849     Not Collective
1850 
1851     Input Parameter:
1852 .   pc - the preconditioner context
1853 
1854     Output Parameters:
1855 +   n   - the number of subdomains for this processor (default value = 1)
1856 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1857 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1858 
1859 
1860     Notes:
1861     The user is responsible for destroying the ISs and freeing the returned arrays.
1862     The IS numbering is in the parallel, global numbering of the vector.
1863 
1864     Level: advanced
1865 
1866 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1867           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1868 @*/
1869 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1870 {
1871   PC_GASM        *osm;
1872   PetscErrorCode ierr;
1873   PetscBool      match;
1874   PetscInt       i;
1875 
1876   PetscFunctionBegin;
1877   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1878   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1879   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1880   osm = (PC_GASM*)pc->data;
1881   if (n) *n = osm->n;
1882   if (iis) {
1883     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1884   }
1885   if (ois) {
1886     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1887   }
1888   if (iis || ois) {
1889     for (i = 0; i < osm->n; ++i) {
1890       if (iis) (*iis)[i] = osm->iis[i];
1891       if (ois) (*ois)[i] = osm->ois[i];
1892     }
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 /*@C
1898     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1899     only) for the additive Schwarz preconditioner.
1900 
1901     Not Collective
1902 
1903     Input Parameter:
1904 .   pc - the preconditioner context
1905 
1906     Output Parameters:
1907 +   n   - the number of matrices for this processor (default value = 1)
1908 -   mat - the matrices
1909 
1910     Notes:
1911     matrices returned by this routine have the same communicators as the index sets (IS)
1912            used to define subdomains in PCGASMSetSubdomains()
1913     Level: advanced
1914 
1915 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1916           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1917 @*/
1918 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1919 {
1920   PC_GASM        *osm;
1921   PetscErrorCode ierr;
1922   PetscBool      match;
1923 
1924   PetscFunctionBegin;
1925   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1926   PetscValidIntPointer(n,2);
1927   if (mat) PetscValidPointer(mat,3);
1928   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1929   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1930   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1931   osm = (PC_GASM*)pc->data;
1932   if (n) *n = osm->n;
1933   if (mat) *mat = osm->pmat;
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 /*@
1938     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1939     Logically Collective
1940 
1941     Input Parameter:
1942 +   pc  - the preconditioner
1943 -   flg - boolean indicating whether to use subdomains defined by the DM
1944 
1945     Options Database Key:
1946 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1947 
1948     Level: intermediate
1949 
1950     Notes:
1951     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1952     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1953     automatically turns the latter off.
1954 
1955 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1956           PCGASMCreateSubdomains2D()
1957 @*/
1958 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1959 {
1960   PC_GASM        *osm = (PC_GASM*)pc->data;
1961   PetscErrorCode ierr;
1962   PetscBool      match;
1963 
1964   PetscFunctionBegin;
1965   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1966   PetscValidLogicalCollectiveBool(pc,flg,2);
1967   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1968   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1969   if (match) {
1970     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1971       osm->dm_subdomains = flg;
1972     }
1973   }
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 /*@
1978     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1979     Not Collective
1980 
1981     Input Parameter:
1982 .   pc  - the preconditioner
1983 
1984     Output Parameter:
1985 .   flg - boolean indicating whether to use subdomains defined by the DM
1986 
1987     Level: intermediate
1988 
1989 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1990           PCGASMCreateSubdomains2D()
1991 @*/
1992 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1993 {
1994   PC_GASM        *osm = (PC_GASM*)pc->data;
1995   PetscErrorCode ierr;
1996   PetscBool      match;
1997 
1998   PetscFunctionBegin;
1999   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
2000   PetscValidBoolPointer(flg,2);
2001   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
2002   if (match) {
2003     if (flg) *flg = osm->dm_subdomains;
2004   }
2005   PetscFunctionReturn(0);
2006 }
2007