xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 8fcddce65efd55a8fe3f87d4c08c15577ce4cbef)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73   ierr = PetscMalloc1(16*(nidx+1)+1, &cidx);CHKERRQ(ierr);
74   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);CHKERRQ(ierr);
75   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
76   for (j = 0; j < nidx; ++j) {
77     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
78   }
79   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
80   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
81   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
82   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
84   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
85   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
87   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
88   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
89   ierr = PetscFree(cidx);CHKERRQ(ierr);
90   /* Outer subdomains. */
91   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
92   /*
93    No more than 15 characters per index plus a space.
94    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
95    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
96    For nidx == 0, the whole string 16 '\0'.
97    */
98   ierr = PetscMalloc1(16*(nidx+1)+1, &cidx);CHKERRQ(ierr);
99   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);CHKERRQ(ierr);
100   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
101   for (j = 0; j < nidx; ++j) {
102     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
103   }
104   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
105   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
106   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
107   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
108   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
109   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
110   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
111   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
113   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
114   ierr = PetscFree(cidx);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
119 {
120   PC_GASM        *osm = (PC_GASM*)pc->data;
121   const char     *prefix;
122   char           fname[PETSC_MAX_PATH_LEN+1];
123   PetscInt       l, d, count;
124   PetscBool      doprint,found;
125   PetscViewer    viewer, sviewer = NULL;
126   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
131   doprint  = PETSC_FALSE;
132   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
133   if (!doprint) PetscFunctionReturn(0);
134   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
135   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
136   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
137   /*
138    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
139    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
140   */
141   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
142   l    = 0;
143   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
144   for (count = 0; count < osm->N; ++count) {
145     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
146     if (l<osm->n) {
147       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
148       if (numbering[d] == count) {
149         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
150         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
151         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
152         ++l;
153       }
154     }
155     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
156   }
157   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
158   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
159   PetscFunctionReturn(0);
160 }
161 
162 
163 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
164 {
165   PC_GASM        *osm = (PC_GASM*)pc->data;
166   const char     *prefix;
167   PetscErrorCode ierr;
168   PetscMPIInt    rank, size;
169   PetscInt       bsz;
170   PetscBool      iascii,view_subdomains=PETSC_FALSE;
171   PetscViewer    sviewer;
172   PetscInt       count, l;
173   char           overlap[256]     = "user-defined overlap";
174   char           gsubdomains[256] = "unknown total number of subdomains";
175   char           msubdomains[256] = "unknown max number of local subdomains";
176   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
177 
178   PetscFunctionBegin;
179   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr);
180   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr);
181 
182   if (osm->overlap >= 0) {
183     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
184   }
185   if (osm->N != PETSC_DETERMINE) {
186     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
187   }
188   if (osm->nmax != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
190   }
191 
192   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
193   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
194 
195   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
196   if (iascii) {
197     /*
198      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
199      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
200      collectively on the comm.
201      */
202     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
203     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
204     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
205     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
208     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
209     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
211     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
212     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
214     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
215     /* Make sure that everybody waits for the banner to be printed. */
216     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
217     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
218     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
219     l = 0;
220     for (count = 0; count < osm->N; ++count) {
221       PetscMPIInt srank, ssize;
222       if (l<osm->n) {
223         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
224         if (numbering[d] == count) {
225           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr);
226           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr);
227           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
228           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
229           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
230           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
231           if (view_subdomains) {
232             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
233           }
234           if (!pc->setupcalled) {
235             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
236           } else {
237             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
238           }
239           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
240           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
241           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
242           ++l;
243         }
244       }
245       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
246     }
247     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
248     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
249   }
250   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
251   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
256 
257 
258 
259 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
260 {
261    PC_GASM              *osm = (PC_GASM*)pc->data;
262    MatPartitioning       part;
263    MPI_Comm              comm;
264    PetscMPIInt           size;
265    PetscInt              nlocalsubdomains,fromrows_localsize;
266    IS                    partitioning,fromrows,isn;
267    Vec                   outervec;
268    PetscErrorCode        ierr;
269 
270    PetscFunctionBegin;
271    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
272    ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273    /* we do not need a hierarchical partitioning when
274     * the total number of subdomains is consistent with
275     * the number of MPI tasks.
276     * For the following cases, we do not need to use HP
277     * */
278    if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
279    if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
280    nlocalsubdomains = size/osm->N;
281    osm->n           = 1;
282    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
283    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
284    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
285    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
286    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
287    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
288    /* get new processor owner number of each vertex */
289    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
290    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
291    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
292    ierr = ISDestroy(&isn);CHKERRQ(ierr);
293    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
294    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
295    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
296    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
297    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
298    ierr = VecScatterCreateWithData(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
299    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
300    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
301    osm->permutationIS = fromrows;
302    osm->pcmat =  pc->pmat;
303    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
304    pc->pmat = osm->permutationP;
305    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
306    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
307    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
308    osm->n           = PETSC_DETERMINE;
309    PetscFunctionReturn(0);
310 }
311 
312 
313 
314 static PetscErrorCode PCSetUp_GASM(PC pc)
315 {
316   PC_GASM        *osm = (PC_GASM*)pc->data;
317   PetscErrorCode ierr;
318   PetscBool      symset,flg;
319   PetscInt       i,nInnerIndices,nTotalInnerIndices;
320   PetscMPIInt    rank, size;
321   MatReuse       scall = MAT_REUSE_MATRIX;
322   KSP            ksp;
323   PC             subpc;
324   const char     *prefix,*pprefix;
325   Vec            x,y;
326   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
327   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
328   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
329   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
330   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
331   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
332   PetscScalar    *gxarray, *gyarray;
333   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
334   PetscInt       num_subdomains    = 0;
335   DM             *subdomain_dm     = NULL;
336   char           **subdomain_names = NULL;
337   PetscInt       *numbering;
338 
339 
340   PetscFunctionBegin;
341   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
342   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
343   if (!pc->setupcalled) {
344 	/* use a hierarchical partitioning */
345     if(osm->hierarchicalpartitioning){
346       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
347     }
348     if (!osm->type_set) {
349       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
350       if (symset && flg) osm->type = PC_GASM_BASIC;
351     }
352 
353     if (osm->n == PETSC_DETERMINE) {
354       if (osm->N != PETSC_DETERMINE) {
355 	   /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
356 	   ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
357       } else if (osm->dm_subdomains && pc->dm) {
358 	/* try pc->dm next, if allowed */
359 	PetscInt  d;
360 	IS       *inner_subdomain_is, *outer_subdomain_is;
361 	ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
362 	if (num_subdomains) {
363 	  ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
364 	}
365 	for (d = 0; d < num_subdomains; ++d) {
366 	  if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
367 	  if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
368 	}
369 	ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
370 	ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
371       } else {
372 	/* still no subdomains; use one per processor */
373 	osm->nmax = osm->n = 1;
374 	ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
375 	osm->N    = size;
376 	ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
377       }
378     }
379     if (!osm->iis) {
380       /*
381        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
382        We create the requisite number of local inner subdomains and then expand them into
383        out subdomains, if necessary.
384        */
385       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
386     }
387     if (!osm->ois) {
388       /*
389 	    Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
390 	    has been requested, copy the inner subdomains over so they can be modified.
391       */
392       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
393       for (i=0; i<osm->n; ++i) {
394 	if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
395 	  ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
396 	  ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
397 	} else {
398 	  ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
399 	  osm->ois[i] = osm->iis[i];
400 	}
401       }
402       if (osm->overlap>0 && osm->N>1) {
403 	   /* Extend the "overlapping" regions by a number of steps */
404 	   ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
405       }
406     }
407 
408     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
409     if (osm->nmax == PETSC_DETERMINE) {
410       PetscMPIInt inwork,outwork;
411       /* determine global number of subdomains and the max number of local subdomains */
412       inwork = osm->n;
413       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
414       osm->nmax  = outwork;
415     }
416     if (osm->N == PETSC_DETERMINE) {
417       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
418       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
419     }
420 
421 
422     if (osm->sort_indices) {
423       for (i=0; i<osm->n; i++) {
424         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
425         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
426       }
427     }
428     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
429     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
430 
431     /*
432        Merge the ISs, create merged vectors and restrictions.
433      */
434     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
435     on = 0;
436     for (i=0; i<osm->n; i++) {
437       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
438       on  += oni;
439     }
440     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
441     on   = 0;
442     /* Merge local indices together */
443     for (i=0; i<osm->n; i++) {
444       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
445       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
446       ierr = PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);CHKERRQ(ierr);
447       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
448       on  += oni;
449     }
450     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
451     nTotalInnerIndices = 0;
452     for(i=0; i<osm->n; i++){
453       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
454       nTotalInnerIndices += nInnerIndices;
455     }
456     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
457     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
458 
459     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
460     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
461     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
462     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
463     /* gois might indices not on local */
464     ierr = VecScatterCreateWithData(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
465     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
466     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
467     ierr = VecDestroy(&x);CHKERRQ(ierr);
468     ierr = ISDestroy(&gois);CHKERRQ(ierr);
469 
470     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
471     {
472       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
473       PetscInt        in;            /* Number of indices in the disjoint uniont of local inner subdomains. */
474       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
475       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
476       IS              giis;          /* IS for the disjoint union of inner subdomains. */
477       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
478       PetscScalar    *array;
479       const PetscInt *indices;
480       PetscInt        k;
481       on = 0;
482       for (i=0; i<osm->n; i++) {
483         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
484         on  += oni;
485       }
486       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
487       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
488       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
489       /* set communicator id to determine where overlap is */
490       in   = 0;
491       for (i=0; i<osm->n; i++) {
492         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
493         for (k = 0; k < ini; ++k){
494           array[in+k] = numbering[i];
495         }
496         in += ini;
497       }
498       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
499       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
502       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
503       on  = 0;
504       in  = 0;
505       for (i=0; i<osm->n; i++) {
506     	ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
507     	ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
508     	for (k=0; k<oni; k++) {
509           /*  skip overlapping indices to get inner domain */
510           if(PetscRealPart(array[on+k]) != numbering[i]) continue;
511           iidx[in]    = indices[k];
512           ioidx[in++] = gostart+on+k;
513     	}
514     	ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
515     	on += oni;
516       }
517       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
518       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
519       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
520       ierr = VecScatterCreateWithData(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
521       ierr = VecDestroy(&y);CHKERRQ(ierr);
522       ierr = ISDestroy(&giis);CHKERRQ(ierr);
523       ierr = ISDestroy(&giois);CHKERRQ(ierr);
524     }
525     ierr = ISDestroy(&goid);CHKERRQ(ierr);
526     ierr = PetscFree(numbering);CHKERRQ(ierr);
527 
528     /* Create the subdomain work vectors. */
529     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
530     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
531     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
532     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
533     for (i=0, on=0; i<osm->n; ++i, on += oni) {
534       PetscInt oNi;
535       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
536       /* on a sub communicator */
537       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
538       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
539       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
540     }
541     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
542     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
543     /* Create the subdomain solvers */
544     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
545     for (i=0; i<osm->n; i++) {
546       char subprefix[PETSC_MAX_PATH_LEN+1];
547       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
548       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
549       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
550       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
551       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
552       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
553       if (subdomain_dm) {
554 	    ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
555 	    ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
556       }
557       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
558       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
559       if (subdomain_names && subdomain_names[i]) {
560 	     ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
561 	     ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
562 	     ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
563       }
564       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
565       osm->ksp[i] = ksp;
566     }
567     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
568     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
569     scall = MAT_INITIAL_MATRIX;
570 
571   } else { /* if (pc->setupcalled) */
572     /*
573        Destroy the submatrices from the previous iteration
574     */
575     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
576       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
577       scall = MAT_INITIAL_MATRIX;
578     }
579     if(osm->permutationIS){
580       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
581       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
582       osm->pcmat = pc->pmat;
583       pc->pmat   = osm->permutationP;
584     }
585 
586   }
587 
588 
589   /*
590      Extract out the submatrices.
591   */
592   if (size > 1) {
593     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
594   } else {
595     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
596   }
597   if (scall == MAT_INITIAL_MATRIX) {
598     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
599     for (i=0; i<osm->n; i++) {
600       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
601       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
602     }
603   }
604 
605   /* Return control to the user so that the submatrices can be modified (e.g., to apply
606      different boundary conditions for the submatrices than for the global problem) */
607   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
608 
609   /*
610      Loop over submatrices putting them into local ksps
611   */
612   for (i=0; i<osm->n; i++) {
613     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
614     if (!pc->setupcalled) {
615       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
616     }
617   }
618   if(osm->pcmat){
619     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
620     pc->pmat   = osm->pcmat;
621     osm->pcmat = 0;
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
627 {
628   PC_GASM        *osm = (PC_GASM*)pc->data;
629   PetscErrorCode ierr;
630   PetscInt       i;
631 
632   PetscFunctionBegin;
633   for (i=0; i<osm->n; i++) {
634     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
640 {
641   PC_GASM        *osm = (PC_GASM*)pc->data;
642   PetscErrorCode ierr;
643   PetscInt       i;
644   Vec            x,y;
645   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
646 
647   PetscFunctionBegin;
648   if(osm->pctoouter){
649     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
650     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
651     x = osm->pcx;
652     y = osm->pcy;
653   }else{
654 	x = xin;
655 	y = yout;
656   }
657   /*
658      Support for limiting the restriction or interpolation only to the inner
659      subdomain values (leaving the other values 0).
660   */
661   if (!(osm->type & PC_GASM_RESTRICT)) {
662     /* have to zero the work RHS since scatter may leave some slots empty */
663     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
664     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
665   } else {
666     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
667   }
668   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
669   if (!(osm->type & PC_GASM_RESTRICT)) {
670     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
671   } else {
672     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
673   }
674   /* do the subdomain solves */
675   for (i=0; i<osm->n; ++i) {
676     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
677     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
678   }
679   /* Do we need to zero y ?? */
680   ierr = VecZeroEntries(y);CHKERRQ(ierr);
681   if (!(osm->type & PC_GASM_INTERPOLATE)) {
682     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
683     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
684   } else {
685     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
686     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
687   }
688   if(osm->pctoouter){
689     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
690     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
696 {
697   PC_GASM        *osm = (PC_GASM*)pc->data;
698   PetscErrorCode ierr;
699   PetscInt       i;
700   Vec            x,y;
701   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
702 
703   PetscFunctionBegin;
704   if(osm->pctoouter){
705    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
706    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
707    x = osm->pcx;
708    y = osm->pcy;
709   }else{
710 	x = xin;
711 	y = yout;
712   }
713   /*
714      Support for limiting the restriction or interpolation to only local
715      subdomain values (leaving the other values 0).
716 
717      Note: these are reversed from the PCApply_GASM() because we are applying the
718      transpose of the three terms
719   */
720   if (!(osm->type & PC_GASM_INTERPOLATE)) {
721     /* have to zero the work RHS since scatter may leave some slots empty */
722     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
723     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
724   } else {
725     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
726   }
727   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
728   if (!(osm->type & PC_GASM_INTERPOLATE)) {
729     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
730   } else {
731     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
732   }
733   /* do the local solves */
734   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
735     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
736     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
737   }
738   ierr = VecZeroEntries(y);CHKERRQ(ierr);
739   if (!(osm->type & PC_GASM_RESTRICT)) {
740     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
741     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
742   } else {
743     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
744     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
745   }
746   if(osm->pctoouter){
747    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
748    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   }
750   PetscFunctionReturn(0);
751 }
752 
753 static PetscErrorCode PCReset_GASM(PC pc)
754 {
755   PC_GASM        *osm = (PC_GASM*)pc->data;
756   PetscErrorCode ierr;
757   PetscInt       i;
758 
759   PetscFunctionBegin;
760   if (osm->ksp) {
761     for (i=0; i<osm->n; i++) {
762       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
763     }
764   }
765   if (osm->pmat) {
766     if (osm->n > 0) {
767       PetscMPIInt size;
768       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
769       if (size > 1) {
770         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
771         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
772       } else {
773         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
774       }
775     }
776   }
777   if (osm->x) {
778     for (i=0; i<osm->n; i++) {
779       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
780       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
781     }
782   }
783   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
784   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
785 
786   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
787   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
788   if (!osm->user_subdomains) {
789     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
790     osm->N    = PETSC_DETERMINE;
791     osm->nmax = PETSC_DETERMINE;
792   }
793   if(osm->pctoouter){
794 	ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
795   }
796   if(osm->permutationIS){
797 	ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
798   }
799   if(osm->pcx){
800 	ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
801   }
802   if(osm->pcy){
803 	ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
804   }
805   if(osm->permutationP){
806     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
807   }
808   if(osm->pcmat){
809 	ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
810   }
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscErrorCode PCDestroy_GASM(PC pc)
815 {
816   PC_GASM        *osm = (PC_GASM*)pc->data;
817   PetscErrorCode ierr;
818   PetscInt       i;
819 
820   PetscFunctionBegin;
821   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
822   /* PCReset will not destroy subdomains, if user_subdomains is true. */
823   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
824   if (osm->ksp) {
825     for (i=0; i<osm->n; i++) {
826       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
827     }
828     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(osm->x);CHKERRQ(ierr);
831   ierr = PetscFree(osm->y);CHKERRQ(ierr);
832   ierr = PetscFree(pc->data);CHKERRQ(ierr);
833   PetscFunctionReturn(0);
834 }
835 
836 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
837 {
838   PC_GASM        *osm = (PC_GASM*)pc->data;
839   PetscErrorCode ierr;
840   PetscInt       blocks,ovl;
841   PetscBool      symset,flg;
842   PCGASMType     gasmtype;
843 
844   PetscFunctionBegin;
845   /* set the type to symmetric if matrix is symmetric */
846   if (!osm->type_set && pc->pmat) {
847     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
848     if (symset && flg) osm->type = PC_GASM_BASIC;
849   }
850   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
851   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
852   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
853   if (flg) {
854     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
855   }
856   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
857   if (flg) {
858     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
859     osm->dm_subdomains = PETSC_FALSE;
860   }
861   flg  = PETSC_FALSE;
862   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
863   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
864   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
865   ierr = PetscOptionsTail();CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 /*------------------------------------------------------------------------------------*/
870 
871 /*@
872     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
873                                communicator.
874     Logically collective on pc
875 
876     Input Parameters:
877 +   pc  - the preconditioner
878 -   N   - total number of subdomains
879 
880 
881     Level: beginner
882 
883 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
884 
885 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
886           PCGASMCreateSubdomains2D()
887 @*/
888 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
889 {
890   PC_GASM        *osm = (PC_GASM*)pc->data;
891   PetscMPIInt    size,rank;
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
896   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
897 
898   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
899   osm->ois = osm->iis = NULL;
900 
901   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
902   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
903   osm->N    = N;
904   osm->n    = PETSC_DETERMINE;
905   osm->nmax = PETSC_DETERMINE;
906   osm->dm_subdomains = PETSC_FALSE;
907   PetscFunctionReturn(0);
908 }
909 
910 
911 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
912 {
913   PC_GASM         *osm = (PC_GASM*)pc->data;
914   PetscErrorCode  ierr;
915   PetscInt        i;
916 
917   PetscFunctionBegin;
918   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
919   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
920 
921   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
922   osm->iis  = osm->ois = NULL;
923   osm->n    = n;
924   osm->N    = PETSC_DETERMINE;
925   osm->nmax = PETSC_DETERMINE;
926   if (ois) {
927     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
928     for (i=0; i<n; i++) {
929       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
930       osm->ois[i] = ois[i];
931     }
932     /*
933        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
934        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
935     */
936     osm->overlap = -1;
937     /* inner subdomains must be provided  */
938     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
939   }/* end if */
940   if (iis) {
941     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
942     for (i=0; i<n; i++) {
943       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
944       osm->iis[i] = iis[i];
945     }
946     if (!ois) {
947       osm->ois = NULL;
948       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
949     }
950   }
951 #if defined(PETSC_USE_DEBUG)
952   {
953     PetscInt        j,rstart,rend,*covered,lsize;
954     const PetscInt  *indices;
955     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
956     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
957     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
958     /* check if the current processor owns indices from others */
959     for (i=0; i<n; i++) {
960       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
961       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
962       for (j=0; j<lsize; j++) {
963         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
964         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
965         else covered[indices[j]-rstart] = 1;
966       }
967     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
968     }
969     /* check if we miss any indices */
970     for (i=rstart; i<rend; i++) {
971       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
972     }
973     ierr = PetscFree(covered);CHKERRQ(ierr);
974   }
975 #endif
976   if (iis)  osm->user_subdomains = PETSC_TRUE;
977   PetscFunctionReturn(0);
978 }
979 
980 
981 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
982 {
983   PC_GASM *osm = (PC_GASM*)pc->data;
984 
985   PetscFunctionBegin;
986   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
987   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
988   if (!pc->setupcalled) osm->overlap = ovl;
989   PetscFunctionReturn(0);
990 }
991 
992 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
993 {
994   PC_GASM *osm = (PC_GASM*)pc->data;
995 
996   PetscFunctionBegin;
997   osm->type     = type;
998   osm->type_set = PETSC_TRUE;
999   PetscFunctionReturn(0);
1000 }
1001 
1002 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1003 {
1004   PC_GASM *osm = (PC_GASM*)pc->data;
1005 
1006   PetscFunctionBegin;
1007   osm->sort_indices = doSort;
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 /*
1012    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1013         In particular, it would upset the global subdomain number calculation.
1014 */
1015 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1016 {
1017   PC_GASM        *osm = (PC_GASM*)pc->data;
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1022 
1023   if (n) *n = osm->n;
1024   if (first) {
1025     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1026     *first -= osm->n;
1027   }
1028   if (ksp) {
1029     /* Assume that local solves are now different; not necessarily
1030        true, though!  This flag is used only for PCView_GASM() */
1031     *ksp                        = osm->ksp;
1032     osm->same_subdomain_solvers = PETSC_FALSE;
1033   }
1034   PetscFunctionReturn(0);
1035 } /* PCGASMGetSubKSP_GASM() */
1036 
1037 /*@C
1038     PCGASMSetSubdomains - Sets the subdomains for this processor
1039     for the additive Schwarz preconditioner.
1040 
1041     Collective on pc
1042 
1043     Input Parameters:
1044 +   pc  - the preconditioner object
1045 .   n   - the number of subdomains for this processor
1046 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1047 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1048 
1049     Notes:
1050     The IS indices use the parallel, global numbering of the vector entries.
1051     Inner subdomains are those where the correction is applied.
1052     Outer subdomains are those where the residual necessary to obtain the
1053     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1054     Both inner and outer subdomains can extend over several processors.
1055     This processor's portion of a subdomain is known as a local subdomain.
1056 
1057     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1058     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1059     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1060     on another MPI process.
1061 
1062     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1063 
1064 
1065     Level: advanced
1066 
1067 .keywords: PC, GASM, set, subdomains, additive Schwarz
1068 
1069 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1070           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1071 @*/
1072 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1073 {
1074   PC_GASM *osm = (PC_GASM*)pc->data;
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1079   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1080   osm->dm_subdomains = PETSC_FALSE;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 
1085 /*@
1086     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1087     additive Schwarz preconditioner.  Either all or no processors in the
1088     pc communicator must call this routine.
1089 
1090     Logically Collective on pc
1091 
1092     Input Parameters:
1093 +   pc  - the preconditioner context
1094 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1095 
1096     Options Database Key:
1097 .   -pc_gasm_overlap <overlap> - Sets overlap
1098 
1099     Notes:
1100     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1101     multiple subdomain per perocessor or "straddling" subdomains that intersect
1102     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1103 
1104     The overlap defaults to 0, so if one desires that no additional
1105     overlap be computed beyond what may have been set with a call to
1106     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1107     not explicitly set the subdomains in application code, then all overlap would be computed
1108     internally by PETSc, and using an overlap of 0 would result in an GASM
1109     variant that is equivalent to the block Jacobi preconditioner.
1110 
1111     Note that one can define initial index sets with any overlap via
1112     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1113     PETSc to extend that overlap further, if desired.
1114 
1115     Level: intermediate
1116 
1117 .keywords: PC, GASM, set, overlap
1118 
1119 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1120           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1121 @*/
1122 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1123 {
1124   PetscErrorCode ierr;
1125   PC_GASM *osm = (PC_GASM*)pc->data;
1126 
1127   PetscFunctionBegin;
1128   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1129   PetscValidLogicalCollectiveInt(pc,ovl,2);
1130   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1131   osm->dm_subdomains = PETSC_FALSE;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 /*@
1136     PCGASMSetType - Sets the type of restriction and interpolation used
1137     for local problems in the additive Schwarz method.
1138 
1139     Logically Collective on PC
1140 
1141     Input Parameters:
1142 +   pc  - the preconditioner context
1143 -   type - variant of GASM, one of
1144 .vb
1145       PC_GASM_BASIC       - full interpolation and restriction
1146       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1147       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1148       PC_GASM_NONE        - local processor restriction and interpolation
1149 .ve
1150 
1151     Options Database Key:
1152 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1153 
1154     Level: intermediate
1155 
1156 .keywords: PC, GASM, set, type
1157 
1158 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1159           PCGASMCreateSubdomains2D()
1160 @*/
1161 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1162 {
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1167   PetscValidLogicalCollectiveEnum(pc,type,2);
1168   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 /*@
1173     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1174 
1175     Logically Collective on PC
1176 
1177     Input Parameters:
1178 +   pc  - the preconditioner context
1179 -   doSort - sort the subdomain indices
1180 
1181     Level: intermediate
1182 
1183 .keywords: PC, GASM, set, type
1184 
1185 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1186           PCGASMCreateSubdomains2D()
1187 @*/
1188 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1189 {
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1194   PetscValidLogicalCollectiveBool(pc,doSort,2);
1195   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 /*@C
1200    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1201    this processor.
1202 
1203    Collective on PC iff first_local is requested
1204 
1205    Input Parameter:
1206 .  pc - the preconditioner context
1207 
1208    Output Parameters:
1209 +  n_local - the number of blocks on this processor or NULL
1210 .  first_local - the global number of the first block on this processor or NULL,
1211                  all processors must request or all must pass NULL
1212 -  ksp - the array of KSP contexts
1213 
1214    Note:
1215    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1216 
1217    Currently for some matrix implementations only 1 block per processor
1218    is supported.
1219 
1220    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1221 
1222    Level: advanced
1223 
1224 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1225 
1226 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1227           PCGASMCreateSubdomains2D(),
1228 @*/
1229 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1230 {
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1235   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 /* -------------------------------------------------------------------------------------*/
1240 /*MC
1241    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1242            its own KSP object.
1243 
1244    Options Database Keys:
1245 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1246 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1247 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1248 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1249 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1250 
1251      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1252       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1253       -pc_gasm_type basic to use the standard GASM.
1254 
1255    Notes:
1256     Blocks can be shared by multiple processes.
1257 
1258      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1259         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1260 
1261      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1262          and set the options directly on the resulting KSP object (you can access its PC
1263          with KSPGetPC())
1264 
1265 
1266    Level: beginner
1267 
1268    Concepts: additive Schwarz method
1269 
1270     References:
1271 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1272      Courant Institute, New York University Technical report
1273 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1274     Cambridge University Press.
1275 
1276 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1277            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1278            PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1279 
1280 M*/
1281 
1282 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1283 {
1284   PetscErrorCode ierr;
1285   PC_GASM        *osm;
1286 
1287   PetscFunctionBegin;
1288   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1289 
1290   osm->N                        = PETSC_DETERMINE;
1291   osm->n                        = PETSC_DECIDE;
1292   osm->nmax                     = PETSC_DETERMINE;
1293   osm->overlap                  = 0;
1294   osm->ksp                      = 0;
1295   osm->gorestriction            = 0;
1296   osm->girestriction            = 0;
1297   osm->pctoouter                = 0;
1298   osm->gx                       = 0;
1299   osm->gy                       = 0;
1300   osm->x                        = 0;
1301   osm->y                        = 0;
1302   osm->pcx                      = 0;
1303   osm->pcy                      = 0;
1304   osm->permutationIS            = 0;
1305   osm->permutationP             = 0;
1306   osm->pcmat                    = 0;
1307   osm->ois                      = 0;
1308   osm->iis                      = 0;
1309   osm->pmat                     = 0;
1310   osm->type                     = PC_GASM_RESTRICT;
1311   osm->same_subdomain_solvers   = PETSC_TRUE;
1312   osm->sort_indices             = PETSC_TRUE;
1313   osm->dm_subdomains            = PETSC_FALSE;
1314   osm->hierarchicalpartitioning = PETSC_FALSE;
1315 
1316   pc->data                 = (void*)osm;
1317   pc->ops->apply           = PCApply_GASM;
1318   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1319   pc->ops->setup           = PCSetUp_GASM;
1320   pc->ops->reset           = PCReset_GASM;
1321   pc->ops->destroy         = PCDestroy_GASM;
1322   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1323   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1324   pc->ops->view            = PCView_GASM;
1325   pc->ops->applyrichardson = 0;
1326 
1327   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1328   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1329   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1330   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1331   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 
1336 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1337 {
1338   MatPartitioning mpart;
1339   const char      *prefix;
1340   PetscInt        i,j,rstart,rend,bs;
1341   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1342   Mat             Ad     = NULL, adj;
1343   IS              ispart,isnumb,*is;
1344   PetscErrorCode  ierr;
1345 
1346   PetscFunctionBegin;
1347   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1348 
1349   /* Get prefix, row distribution, and block size */
1350   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1351   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1352   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1353   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1354 
1355   /* Get diagonal block from matrix if possible */
1356   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1357   if (hasop) {
1358     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1359   }
1360   if (Ad) {
1361     ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1362     if (!isbaij) {ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1363   }
1364   if (Ad && nloc > 1) {
1365     PetscBool  match,done;
1366     /* Try to setup a good matrix partitioning if available */
1367     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1368     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1369     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1370     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1371     if (!match) {
1372       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1373     }
1374     if (!match) { /* assume a "good" partitioner is available */
1375       PetscInt       na;
1376       const PetscInt *ia,*ja;
1377       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1378       if (done) {
1379         /* Build adjacency matrix by hand. Unfortunately a call to
1380            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1381            remove the block-aij structure and we cannot expect
1382            MatPartitioning to split vertices as we need */
1383         PetscInt       i,j,len,nnz,cnt,*iia=0,*jja=0;
1384         const PetscInt *row;
1385         nnz = 0;
1386         for (i=0; i<na; i++) { /* count number of nonzeros */
1387           len = ia[i+1] - ia[i];
1388           row = ja + ia[i];
1389           for (j=0; j<len; j++) {
1390             if (row[j] == i) { /* don't count diagonal */
1391               len--; break;
1392             }
1393           }
1394           nnz += len;
1395         }
1396         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1397         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1398         nnz    = 0;
1399         iia[0] = 0;
1400         for (i=0; i<na; i++) { /* fill adjacency */
1401           cnt = 0;
1402           len = ia[i+1] - ia[i];
1403           row = ja + ia[i];
1404           for (j=0; j<len; j++) {
1405             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1406           }
1407           nnz += cnt;
1408           iia[i+1] = nnz;
1409         }
1410         /* Partitioning of the adjacency matrix */
1411         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1412         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1413         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1414         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1415         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1416         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1417         foundpart = PETSC_TRUE;
1418       }
1419       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1420     }
1421     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1422   }
1423   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1424   if (!foundpart) {
1425 
1426     /* Partitioning by contiguous chunks of rows */
1427 
1428     PetscInt mbs   = (rend-rstart)/bs;
1429     PetscInt start = rstart;
1430     for (i=0; i<nloc; i++) {
1431       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1432       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1433       start += count;
1434     }
1435 
1436   } else {
1437 
1438     /* Partitioning by adjacency of diagonal block  */
1439 
1440     const PetscInt *numbering;
1441     PetscInt       *count,nidx,*indices,*newidx,start=0;
1442     /* Get node count in each partition */
1443     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1444     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1445     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1446       for (i=0; i<nloc; i++) count[i] *= bs;
1447     }
1448     /* Build indices from node numbering */
1449     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1450     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1451     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1452     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1453     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1454     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1455     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1456       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1457       for (i=0; i<nidx; i++) {
1458         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1459       }
1460       ierr    = PetscFree(indices);CHKERRQ(ierr);
1461       nidx   *= bs;
1462       indices = newidx;
1463     }
1464     /* Shift to get global indices */
1465     for (i=0; i<nidx; i++) indices[i] += rstart;
1466 
1467     /* Build the index sets for each block */
1468     for (i=0; i<nloc; i++) {
1469       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1470       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1471       start += count[i];
1472     }
1473 
1474     ierr = PetscFree(count);CHKERRQ(ierr);
1475     ierr = PetscFree(indices);CHKERRQ(ierr);
1476     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1477     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1478   }
1479   *iis = is;
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1484 {
1485   PetscErrorCode  ierr;
1486 
1487   PetscFunctionBegin;
1488   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 
1493 
1494 /*@C
1495    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1496    Schwarz preconditioner for a any problem based on its matrix.
1497 
1498    Collective
1499 
1500    Input Parameters:
1501 +  A       - The global matrix operator
1502 -  N       - the number of global subdomains requested
1503 
1504    Output Parameters:
1505 +  n   - the number of subdomains created on this processor
1506 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1507 
1508    Level: advanced
1509 
1510    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1511          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1512          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1513 	 outer subdomains will be automatically generated from these according to the requested amount of
1514 	 overlap; this is currently supported only with local subdomains.
1515 
1516 
1517 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1518 
1519 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1520 @*/
1521 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1522 {
1523   PetscMPIInt     size;
1524   PetscErrorCode  ierr;
1525 
1526   PetscFunctionBegin;
1527   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1528   PetscValidPointer(iis,4);
1529 
1530   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1531   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1532   if (N >= size) {
1533     *n = N/size + (N%size);
1534     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1535   } else {
1536     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1537   }
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /*@C
1542    PCGASMDestroySubdomains - Destroys the index sets created with
1543    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1544    called after setting subdomains with PCGASMSetSubdomains().
1545 
1546    Collective
1547 
1548    Input Parameters:
1549 +  n   - the number of index sets
1550 .  iis - the array of inner subdomains,
1551 -  ois - the array of outer subdomains, can be NULL
1552 
1553    Level: intermediate
1554 
1555    Notes:
1556     this is merely a convenience subroutine that walks each list,
1557    destroys each IS on the list, and then frees the list. At the end the
1558    list pointers are set to NULL.
1559 
1560 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1561 
1562 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1563 @*/
1564 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1565 {
1566   PetscInt       i;
1567   PetscErrorCode ierr;
1568 
1569   PetscFunctionBegin;
1570   if (n <= 0) PetscFunctionReturn(0);
1571   if (ois) {
1572     PetscValidPointer(ois,3);
1573     if (*ois) {
1574       PetscValidPointer(*ois,3);
1575       for (i=0; i<n; i++) {
1576         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1577       }
1578       ierr = PetscFree((*ois));CHKERRQ(ierr);
1579     }
1580   }
1581   if (iis) {
1582     PetscValidPointer(iis,2);
1583     if (*iis) {
1584       PetscValidPointer(*iis,2);
1585       for (i=0; i<n; i++) {
1586         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1587       }
1588       ierr = PetscFree((*iis));CHKERRQ(ierr);
1589     }
1590   }
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 
1595 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1596   {                                                                                                       \
1597     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1598     /*                                                                                                    \
1599      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1600      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1601      subdomain).                                                                                          \
1602      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1603      of the intersection.                                                                                 \
1604     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1605     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1606     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1607     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1608     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1609     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1610     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1611     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1612     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1613     /* Now compute the size of the local subdomain n. */ \
1614     *n = 0;                                               \
1615     if (*ylow_loc < *yhigh_loc) {                           \
1616       PetscInt width = xright-xleft;                     \
1617       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1618       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1619       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1620     } \
1621   }
1622 
1623 
1624 
1625 /*@
1626    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1627    preconditioner for a two-dimensional problem on a regular grid.
1628 
1629    Collective
1630 
1631    Input Parameters:
1632 +  M, N               - the global number of grid points in the x and y directions
1633 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1634 .  dof                - degrees of freedom per node
1635 -  overlap            - overlap in mesh lines
1636 
1637    Output Parameters:
1638 +  Nsub - the number of local subdomains created
1639 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1640 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1641 
1642 
1643    Level: advanced
1644 
1645 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1646 
1647 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1648 @*/
1649 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1650 {
1651   PetscErrorCode ierr;
1652   PetscMPIInt    size, rank;
1653   PetscInt       i, j;
1654   PetscInt       maxheight, maxwidth;
1655   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1656   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1657   PetscInt       x[2][2], y[2][2], n[2];
1658   PetscInt       first, last;
1659   PetscInt       nidx, *idx;
1660   PetscInt       ii,jj,s,q,d;
1661   PetscInt       k,kk;
1662   PetscMPIInt    color;
1663   MPI_Comm       comm, subcomm;
1664   IS             **xis = 0, **is = ois, **is_local = iis;
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1668   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1669   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1670   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1671   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1672                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1673 
1674   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1675   s      = 0;
1676   ystart = 0;
1677   for (j=0; j<Ndomains; ++j) {
1678     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1679     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1680     /* Vertical domain limits with an overlap. */
1681     ylow   = PetscMax(ystart - overlap,0);
1682     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1683     xstart = 0;
1684     for (i=0; i<Mdomains; ++i) {
1685       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1686       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1687       /* Horizontal domain limits with an overlap. */
1688       xleft  = PetscMax(xstart - overlap,0);
1689       xright = PetscMin(xstart + maxwidth + overlap,M);
1690       /*
1691          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1692       */
1693       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1694       if (nidx) ++s;
1695       xstart += maxwidth;
1696     } /* for (i = 0; i < Mdomains; ++i) */
1697     ystart += maxheight;
1698   } /* for (j = 0; j < Ndomains; ++j) */
1699 
1700   /* Now we can allocate the necessary number of ISs. */
1701   *nsub  = s;
1702   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1703   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1704   s      = 0;
1705   ystart = 0;
1706   for (j=0; j<Ndomains; ++j) {
1707     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1708     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1709     /* Vertical domain limits with an overlap. */
1710     y[0][0] = PetscMax(ystart - overlap,0);
1711     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1712     /* Vertical domain limits without an overlap. */
1713     y[1][0] = ystart;
1714     y[1][1] = PetscMin(ystart + maxheight,N);
1715     xstart  = 0;
1716     for (i=0; i<Mdomains; ++i) {
1717       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1718       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1719       /* Horizontal domain limits with an overlap. */
1720       x[0][0] = PetscMax(xstart - overlap,0);
1721       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1722       /* Horizontal domain limits without an overlap. */
1723       x[1][0] = xstart;
1724       x[1][1] = PetscMin(xstart+maxwidth,M);
1725       /*
1726          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1727          Do this twice: first for the domains with overlaps, and once without.
1728          During the first pass create the subcommunicators, and use them on the second pass as well.
1729       */
1730       for (q = 0; q < 2; ++q) {
1731         PetscBool split = PETSC_FALSE;
1732         /*
1733           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1734           according to whether the domain with an overlap or without is considered.
1735         */
1736         xleft = x[q][0]; xright = x[q][1];
1737         ylow  = y[q][0]; yhigh  = y[q][1];
1738         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1739         nidx *= dof;
1740         n[q]  = nidx;
1741         /*
1742          Based on the counted number of indices in the local domain *with an overlap*,
1743          construct a subcommunicator of all the processors supporting this domain.
1744          Observe that a domain with an overlap might have nontrivial local support,
1745          while the domain without an overlap might not.  Hence, the decision to participate
1746          in the subcommunicator must be based on the domain with an overlap.
1747          */
1748         if (q == 0) {
1749           if (nidx) color = 1;
1750           else color = MPI_UNDEFINED;
1751           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1752           split = PETSC_TRUE;
1753         }
1754         /*
1755          Proceed only if the number of local indices *with an overlap* is nonzero.
1756          */
1757         if (n[0]) {
1758           if (q == 0) xis = is;
1759           if (q == 1) {
1760             /*
1761              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1762              Moreover, if the overlap is zero, the two ISs are identical.
1763              */
1764             if (overlap == 0) {
1765               (*is_local)[s] = (*is)[s];
1766               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1767               continue;
1768             } else {
1769               xis     = is_local;
1770               subcomm = ((PetscObject)(*is)[s])->comm;
1771             }
1772           } /* if (q == 1) */
1773           idx  = NULL;
1774           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1775           if (nidx) {
1776             k = 0;
1777             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1778               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1779               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1780               kk = dof*(M*jj + x0);
1781               for (ii=x0; ii<x1; ++ii) {
1782                 for (d = 0; d < dof; ++d) {
1783                   idx[k++] = kk++;
1784                 }
1785               }
1786             }
1787           }
1788           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1789           if (split) {
1790             ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
1791           }
1792         }/* if (n[0]) */
1793       }/* for (q = 0; q < 2; ++q) */
1794       if (n[0]) ++s;
1795       xstart += maxwidth;
1796     } /* for (i = 0; i < Mdomains; ++i) */
1797     ystart += maxheight;
1798   } /* for (j = 0; j < Ndomains; ++j) */
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 /*@C
1803     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1804     for the additive Schwarz preconditioner.
1805 
1806     Not Collective
1807 
1808     Input Parameter:
1809 .   pc - the preconditioner context
1810 
1811     Output Parameters:
1812 +   n   - the number of subdomains for this processor (default value = 1)
1813 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1814 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1815 
1816 
1817     Notes:
1818     The user is responsible for destroying the ISs and freeing the returned arrays.
1819     The IS numbering is in the parallel, global numbering of the vector.
1820 
1821     Level: advanced
1822 
1823 .keywords: PC, GASM, get, subdomains, additive Schwarz
1824 
1825 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1826           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1827 @*/
1828 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1829 {
1830   PC_GASM        *osm;
1831   PetscErrorCode ierr;
1832   PetscBool      match;
1833   PetscInt       i;
1834 
1835   PetscFunctionBegin;
1836   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1837   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1838   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1839   osm = (PC_GASM*)pc->data;
1840   if (n) *n = osm->n;
1841   if (iis) {
1842     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1843   }
1844   if (ois) {
1845     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1846   }
1847   if (iis || ois) {
1848     for (i = 0; i < osm->n; ++i) {
1849       if (iis) (*iis)[i] = osm->iis[i];
1850       if (ois) (*ois)[i] = osm->ois[i];
1851     }
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 /*@C
1857     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1858     only) for the additive Schwarz preconditioner.
1859 
1860     Not Collective
1861 
1862     Input Parameter:
1863 .   pc - the preconditioner context
1864 
1865     Output Parameters:
1866 +   n   - the number of matrices for this processor (default value = 1)
1867 -   mat - the matrices
1868 
1869     Notes:
1870     matrices returned by this routine have the same communicators as the index sets (IS)
1871            used to define subdomains in PCGASMSetSubdomains()
1872     Level: advanced
1873 
1874 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1875 
1876 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1877           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1878 @*/
1879 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1880 {
1881   PC_GASM        *osm;
1882   PetscErrorCode ierr;
1883   PetscBool      match;
1884 
1885   PetscFunctionBegin;
1886   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1887   PetscValidIntPointer(n,2);
1888   if (mat) PetscValidPointer(mat,3);
1889   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1890   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1891   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1892   osm = (PC_GASM*)pc->data;
1893   if (n) *n = osm->n;
1894   if (mat) *mat = osm->pmat;
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 /*@
1899     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1900     Logically Collective
1901 
1902     Input Parameter:
1903 +   pc  - the preconditioner
1904 -   flg - boolean indicating whether to use subdomains defined by the DM
1905 
1906     Options Database Key:
1907 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1908 
1909     Level: intermediate
1910 
1911     Notes:
1912     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1913     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1914     automatically turns the latter off.
1915 
1916 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1917 
1918 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1919           PCGASMCreateSubdomains2D()
1920 @*/
1921 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1922 {
1923   PC_GASM        *osm = (PC_GASM*)pc->data;
1924   PetscErrorCode ierr;
1925   PetscBool      match;
1926 
1927   PetscFunctionBegin;
1928   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1929   PetscValidLogicalCollectiveBool(pc,flg,2);
1930   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1931   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1932   if (match) {
1933     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1934       osm->dm_subdomains = flg;
1935     }
1936   }
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 /*@
1941     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1942     Not Collective
1943 
1944     Input Parameter:
1945 .   pc  - the preconditioner
1946 
1947     Output Parameter:
1948 .   flg - boolean indicating whether to use subdomains defined by the DM
1949 
1950     Level: intermediate
1951 
1952 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1953 
1954 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1955           PCGASMCreateSubdomains2D()
1956 @*/
1957 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1958 {
1959   PC_GASM        *osm = (PC_GASM*)pc->data;
1960   PetscErrorCode ierr;
1961   PetscBool      match;
1962 
1963   PetscFunctionBegin;
1964   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1965   PetscValidPointer(flg,2);
1966   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1967   if (match) {
1968     if (flg) *flg = osm->dm_subdomains;
1969   }
1970   PetscFunctionReturn(0);
1971 }
1972