xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 7687e1ecc0da34deafbca176f4acbcceb4e2b94c)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73 #define len  16*(nidx+1)+1
74   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78   for (j = 0; j < nidx; ++j) {
79     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80   }
81   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91   ierr = PetscFree(cidx);CHKERRQ(ierr);
92   /* Outer subdomains. */
93   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94   /*
95    No more than 15 characters per index plus a space.
96    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98    For nidx == 0, the whole string 16 '\0'.
99    */
100 #define len  16*(nidx+1)+1
101   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
123 {
124   PC_GASM        *osm = (PC_GASM*)pc->data;
125   const char     *prefix;
126   char           fname[PETSC_MAX_PATH_LEN+1];
127   PetscInt       l, d, count;
128   PetscBool      doprint,found;
129   PetscViewer    viewer, sviewer = NULL;
130   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135   doprint  = PETSC_FALSE;
136   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
137   if (!doprint) PetscFunctionReturn(0);
138   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
139   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
140   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
141   /*
142    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144   */
145   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
146   l    = 0;
147   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
148   for (count = 0; count < osm->N; ++count) {
149     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150     if (l<osm->n) {
151       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152       if (numbering[d] == count) {
153         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
154         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
155         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156         ++l;
157       }
158     }
159     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
160   }
161   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
162   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167 {
168   PC_GASM        *osm = (PC_GASM*)pc->data;
169   const char     *prefix;
170   PetscErrorCode ierr;
171   PetscMPIInt    rank, size;
172   PetscInt       bsz;
173   PetscBool      iascii,view_subdomains=PETSC_FALSE;
174   PetscViewer    sviewer;
175   PetscInt       count, l;
176   char           overlap[256]     = "user-defined overlap";
177   char           gsubdomains[256] = "unknown total number of subdomains";
178   char           msubdomains[256] = "unknown max number of local subdomains";
179   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
180 
181   PetscFunctionBegin;
182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr);
183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr);
184 
185   if (osm->overlap >= 0) {
186     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
187   }
188   if (osm->N != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
190   }
191   if (osm->nmax != PETSC_DETERMINE) {
192     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
193   }
194 
195   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
196   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
197 
198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
199   if (iascii) {
200     /*
201      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203      collectively on the comm.
204      */
205     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
208     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
209     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
211     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
212     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
214     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
217     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
218     /* Make sure that everybody waits for the banner to be printed. */
219     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
220     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
222     l = 0;
223     for (count = 0; count < osm->N; ++count) {
224       PetscMPIInt srank, ssize;
225       if (l<osm->n) {
226         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227         if (numbering[d] == count) {
228           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr);
229           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr);
230           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
231           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
232           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
233           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
234           if (view_subdomains) {
235             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
236           }
237           if (!pc->setupcalled) {
238             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
239           } else {
240             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
241           }
242           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
243           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
244           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
245           ++l;
246         }
247       }
248       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
249     }
250     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
251     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
253     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
254     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
260 
261 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
262 {
263    PC_GASM              *osm = (PC_GASM*)pc->data;
264    MatPartitioning       part;
265    MPI_Comm              comm;
266    PetscMPIInt           size;
267    PetscInt              nlocalsubdomains,fromrows_localsize;
268    IS                    partitioning,fromrows,isn;
269    Vec                   outervec;
270    PetscErrorCode        ierr;
271 
272    PetscFunctionBegin;
273    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
274    ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
275    /* we do not need a hierarchical partitioning when
276     * the total number of subdomains is consistent with
277     * the number of MPI tasks.
278     * For the following cases, we do not need to use HP
279     * */
280    if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
281    if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
282    nlocalsubdomains = size/osm->N;
283    osm->n           = 1;
284    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
285    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
286    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
287    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
288    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
289    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
290    /* get new processor owner number of each vertex */
291    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
292    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
293    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
294    ierr = ISDestroy(&isn);CHKERRQ(ierr);
295    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
296    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
297    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
298    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
299    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
300    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
301    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
302    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
303    osm->permutationIS = fromrows;
304    osm->pcmat =  pc->pmat;
305    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
306    pc->pmat = osm->permutationP;
307    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
308    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
309    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
310    osm->n           = PETSC_DETERMINE;
311    PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode PCSetUp_GASM(PC pc)
315 {
316   PC_GASM        *osm = (PC_GASM*)pc->data;
317   PetscErrorCode ierr;
318   PetscInt       i,nInnerIndices,nTotalInnerIndices;
319   PetscMPIInt    rank, size;
320   MatReuse       scall = MAT_REUSE_MATRIX;
321   KSP            ksp;
322   PC             subpc;
323   const char     *prefix,*pprefix;
324   Vec            x,y;
325   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
326   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
327   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
328   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
329   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
330   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
331   PetscScalar    *gxarray, *gyarray;
332   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
333   PetscInt       num_subdomains    = 0;
334   DM             *subdomain_dm     = NULL;
335   char           **subdomain_names = NULL;
336   PetscInt       *numbering;
337 
338 
339   PetscFunctionBegin;
340   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
341   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
342   if (!pc->setupcalled) {
343         /* use a hierarchical partitioning */
344     if (osm->hierarchicalpartitioning){
345       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
346     }
347     if (osm->n == PETSC_DETERMINE) {
348       if (osm->N != PETSC_DETERMINE) {
349            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
350            ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
351       } else if (osm->dm_subdomains && pc->dm) {
352         /* try pc->dm next, if allowed */
353         PetscInt  d;
354         IS       *inner_subdomain_is, *outer_subdomain_is;
355         ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
356         if (num_subdomains) {
357           ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
358         }
359         for (d = 0; d < num_subdomains; ++d) {
360           if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
361           if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
362         }
363         ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
364         ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
365       } else {
366         /* still no subdomains; use one per processor */
367         osm->nmax = osm->n = 1;
368         ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
369         osm->N    = size;
370         ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
371       }
372     }
373     if (!osm->iis) {
374       /*
375        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
376        We create the requisite number of local inner subdomains and then expand them into
377        out subdomains, if necessary.
378        */
379       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
380     }
381     if (!osm->ois) {
382       /*
383             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
384             has been requested, copy the inner subdomains over so they can be modified.
385       */
386       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
387       for (i=0; i<osm->n; ++i) {
388         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
389           ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
390           ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
391         } else {
392           ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
393           osm->ois[i] = osm->iis[i];
394         }
395       }
396       if (osm->overlap>0 && osm->N>1) {
397         /* Extend the "overlapping" regions by a number of steps */
398         ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
399       }
400     }
401 
402     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
403     if (osm->nmax == PETSC_DETERMINE) {
404       PetscMPIInt inwork,outwork;
405       /* determine global number of subdomains and the max number of local subdomains */
406       inwork     = osm->n;
407       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
408       osm->nmax  = outwork;
409     }
410     if (osm->N == PETSC_DETERMINE) {
411       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
412       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
413     }
414 
415     if (osm->sort_indices) {
416       for (i=0; i<osm->n; i++) {
417         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
418         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
419       }
420     }
421     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
422     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
423 
424     /*
425        Merge the ISs, create merged vectors and restrictions.
426      */
427     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
428     on = 0;
429     for (i=0; i<osm->n; i++) {
430       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
431       on  += oni;
432     }
433     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
434     on   = 0;
435     /* Merge local indices together */
436     for (i=0; i<osm->n; i++) {
437       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
438       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
439       ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
440       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
441       on  += oni;
442     }
443     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
444     nTotalInnerIndices = 0;
445     for (i=0; i<osm->n; i++){
446       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
447       nTotalInnerIndices += nInnerIndices;
448     }
449     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
450     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
451 
452     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
453     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
454     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
455     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
456     /* gois might indices not on local */
457     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
458     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
459     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
460     ierr = VecDestroy(&x);CHKERRQ(ierr);
461     ierr = ISDestroy(&gois);CHKERRQ(ierr);
462 
463     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
464     {
465       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
466       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
467       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
468       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
469       IS              giis;          /* IS for the disjoint union of inner subdomains. */
470       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
471       PetscScalar    *array;
472       const PetscInt *indices;
473       PetscInt        k;
474       on = 0;
475       for (i=0; i<osm->n; i++) {
476         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
477         on  += oni;
478       }
479       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
480       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
481       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
482       /* set communicator id to determine where overlap is */
483       in   = 0;
484       for (i=0; i<osm->n; i++) {
485         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
486         for (k = 0; k < ini; ++k){
487           array[in+k] = numbering[i];
488         }
489         in += ini;
490       }
491       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
492       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
494       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
495       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
496       on  = 0;
497       in  = 0;
498       for (i=0; i<osm->n; i++) {
499         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
500         ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
501         for (k=0; k<oni; k++) {
502           /*  skip overlapping indices to get inner domain */
503           if (PetscRealPart(array[on+k]) != numbering[i]) continue;
504           iidx[in]    = indices[k];
505           ioidx[in++] = gostart+on+k;
506         }
507         ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
508         on += oni;
509       }
510       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
511       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
512       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
513       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
514       ierr = VecDestroy(&y);CHKERRQ(ierr);
515       ierr = ISDestroy(&giis);CHKERRQ(ierr);
516       ierr = ISDestroy(&giois);CHKERRQ(ierr);
517     }
518     ierr = ISDestroy(&goid);CHKERRQ(ierr);
519     ierr = PetscFree(numbering);CHKERRQ(ierr);
520 
521     /* Create the subdomain work vectors. */
522     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
523     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
524     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
525     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
526     for (i=0, on=0; i<osm->n; ++i, on += oni) {
527       PetscInt oNi;
528       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
529       /* on a sub communicator */
530       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
531       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
532       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
533     }
534     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
535     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
536     /* Create the subdomain solvers */
537     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
538     for (i=0; i<osm->n; i++) {
539       char subprefix[PETSC_MAX_PATH_LEN+1];
540       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
541       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
542       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
543       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
544       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
545       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
546       if (subdomain_dm) {
547         ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
548         ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
549       }
550       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
551       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
552       if (subdomain_names && subdomain_names[i]) {
553         ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
554         ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
555         ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
556       }
557       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
558       osm->ksp[i] = ksp;
559     }
560     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
561     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
562     scall = MAT_INITIAL_MATRIX;
563   } else { /* if (pc->setupcalled) */
564     /*
565        Destroy the submatrices from the previous iteration
566     */
567     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
568       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
569       scall = MAT_INITIAL_MATRIX;
570     }
571     if (osm->permutationIS){
572       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
573       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
574       osm->pcmat = pc->pmat;
575       pc->pmat   = osm->permutationP;
576     }
577   }
578 
579   /*
580      Extract the submatrices.
581   */
582   if (size > 1) {
583     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
584   } else {
585     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
586   }
587   if (scall == MAT_INITIAL_MATRIX) {
588     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
589     for (i=0; i<osm->n; i++) {
590       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
591       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
592     }
593   }
594 
595   /* Return control to the user so that the submatrices can be modified (e.g., to apply
596      different boundary conditions for the submatrices than for the global problem) */
597   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
598 
599   /*
600      Loop over submatrices putting them into local ksps
601   */
602   for (i=0; i<osm->n; i++) {
603     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
604     ierr = KSPGetOptionsPrefix(osm->ksp[i],&prefix);CHKERRQ(ierr);
605     ierr = MatSetOptionsPrefix(osm->pmat[i],prefix);CHKERRQ(ierr);
606     if (!pc->setupcalled) {
607       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
608     }
609   }
610   if (osm->pcmat){
611     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
612     pc->pmat   = osm->pcmat;
613     osm->pcmat = NULL;
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
619 {
620   PC_GASM        *osm = (PC_GASM*)pc->data;
621   PetscErrorCode ierr;
622   PetscInt       i;
623 
624   PetscFunctionBegin;
625   for (i=0; i<osm->n; i++) {
626     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
632 {
633   PC_GASM        *osm = (PC_GASM*)pc->data;
634   PetscErrorCode ierr;
635   PetscInt       i;
636   Vec            x,y;
637   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
638 
639   PetscFunctionBegin;
640   if (osm->pctoouter) {
641     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
642     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
643     x = osm->pcx;
644     y = osm->pcy;
645   } else {
646     x = xin;
647     y = yout;
648   }
649   /*
650      support for limiting the restriction or interpolation only to the inner
651      subdomain values (leaving the other values 0).
652   */
653   if (!(osm->type & PC_GASM_RESTRICT)) {
654     /* have to zero the work RHS since scatter may leave some slots empty */
655     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
656     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
657   } else {
658     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
659   }
660   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
661   if (!(osm->type & PC_GASM_RESTRICT)) {
662     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
663   } else {
664     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
665   }
666   /* do the subdomain solves */
667   for (i=0; i<osm->n; ++i) {
668     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
669     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
670   }
671   /* do we need to zero y? */
672   ierr = VecZeroEntries(y);CHKERRQ(ierr);
673   if (!(osm->type & PC_GASM_INTERPOLATE)) {
674     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
675     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
676   } else {
677     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
678     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
679   }
680   if (osm->pctoouter) {
681     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
682     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
683   }
684   PetscFunctionReturn(0);
685 }
686 
687 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
688 {
689   PC_GASM        *osm = (PC_GASM*)pc->data;
690   Mat            X,Y,O=NULL,Z,W;
691   Vec            x,y;
692   PetscInt       i,m,M,N;
693   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697   if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
698   ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
699   if (osm->pctoouter) {
700     ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
701     ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
702     ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
703     for (i = 0; i < N; ++i) {
704       ierr = MatDenseGetColumnVecRead(Xin,i,&x);
705       ierr = MatDenseGetColumnVecWrite(O,i,&y);
706       ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
707       ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
708       ierr = MatDenseRestoreColumnVecWrite(O,i,&y);
709       ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);
710     }
711     X = Y = O;
712   } else {
713     X = Xin;
714     Y = Yout;
715   }
716   /*
717      support for limiting the restriction or interpolation only to the inner
718      subdomain values (leaving the other values 0).
719   */
720   ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
721   ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
722   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
723   for (i = 0; i < N; ++i) {
724     ierr = MatDenseGetColumnVecRead(X,i,&x);
725     ierr = MatDenseGetColumnVecWrite(Z,i,&y);
726     if (!(osm->type & PC_GASM_RESTRICT)) {
727       /* have to zero the work RHS since scatter may leave some slots empty */
728       ierr = VecZeroEntries(y);CHKERRQ(ierr);
729       ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
730       ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
731     } else {
732       ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
733       ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
734     }
735     ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);
736     ierr = MatDenseRestoreColumnVecRead(X,i,&x);
737   }
738   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
739   ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
740   ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
741   ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
742   /* do the subdomain solve */
743   ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
744   ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
745   ierr = MatDestroy(&Z);CHKERRQ(ierr);
746   /* do we need to zero y? */
747   ierr = MatZeroEntries(Y);CHKERRQ(ierr);
748   for (i = 0; i < N; ++i) {
749     ierr = MatDenseGetColumnVecWrite(Y,i,&y);
750     ierr = MatDenseGetColumnVecRead(W,i,&x);
751     if (!(osm->type & PC_GASM_INTERPOLATE)) {
752       ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
753       ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
754     } else {
755       ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
756       ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
757     }
758     ierr = MatDenseRestoreColumnVecRead(W,i,&x);
759     if (osm->pctoouter) {
760       ierr = MatDenseGetColumnVecWrite(Yout,i,&x);
761       ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
762       ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
763       ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);
764     }
765     ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);
766   }
767   ierr = MatDestroy(&W);CHKERRQ(ierr);
768   ierr = MatDestroy(&O);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
773 {
774   PC_GASM        *osm = (PC_GASM*)pc->data;
775   PetscErrorCode ierr;
776   PetscInt       i;
777   Vec            x,y;
778   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
779 
780   PetscFunctionBegin;
781   if (osm->pctoouter){
782    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784    x = osm->pcx;
785    y = osm->pcy;
786   }else{
787         x = xin;
788         y = yout;
789   }
790   /*
791      Support for limiting the restriction or interpolation to only local
792      subdomain values (leaving the other values 0).
793 
794      Note: these are reversed from the PCApply_GASM() because we are applying the
795      transpose of the three terms
796   */
797   if (!(osm->type & PC_GASM_INTERPOLATE)) {
798     /* have to zero the work RHS since scatter may leave some slots empty */
799     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
800     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
801   } else {
802     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
803   }
804   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
805   if (!(osm->type & PC_GASM_INTERPOLATE)) {
806     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
807   } else {
808     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
809   }
810   /* do the local solves */
811   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
812     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
813     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
814   }
815   ierr = VecZeroEntries(y);CHKERRQ(ierr);
816   if (!(osm->type & PC_GASM_RESTRICT)) {
817     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
818     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
819   } else {
820     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
821     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
822   }
823   if (osm->pctoouter){
824    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   }
827   PetscFunctionReturn(0);
828 }
829 
830 static PetscErrorCode PCReset_GASM(PC pc)
831 {
832   PC_GASM        *osm = (PC_GASM*)pc->data;
833   PetscErrorCode ierr;
834   PetscInt       i;
835 
836   PetscFunctionBegin;
837   if (osm->ksp) {
838     for (i=0; i<osm->n; i++) {
839       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
840     }
841   }
842   if (osm->pmat) {
843     if (osm->n > 0) {
844       PetscMPIInt size;
845       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
846       if (size > 1) {
847         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
848         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
849       } else {
850         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
851       }
852     }
853   }
854   if (osm->x) {
855     for (i=0; i<osm->n; i++) {
856       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
857       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
858     }
859   }
860   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
861   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
862 
863   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
864   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
865   if (!osm->user_subdomains) {
866     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
867     osm->N    = PETSC_DETERMINE;
868     osm->nmax = PETSC_DETERMINE;
869   }
870   if (osm->pctoouter){
871         ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
872   }
873   if (osm->permutationIS){
874         ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
875   }
876   if (osm->pcx){
877         ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
878   }
879   if (osm->pcy){
880         ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
881   }
882   if (osm->permutationP){
883     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
884   }
885   if (osm->pcmat){
886         ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode PCDestroy_GASM(PC pc)
892 {
893   PC_GASM        *osm = (PC_GASM*)pc->data;
894   PetscErrorCode ierr;
895   PetscInt       i;
896 
897   PetscFunctionBegin;
898   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
899   /* PCReset will not destroy subdomains, if user_subdomains is true. */
900   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
901   if (osm->ksp) {
902     for (i=0; i<osm->n; i++) {
903       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
904     }
905     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
906   }
907   ierr = PetscFree(osm->x);CHKERRQ(ierr);
908   ierr = PetscFree(osm->y);CHKERRQ(ierr);
909   ierr = PetscFree(pc->data);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
914 {
915   PC_GASM        *osm = (PC_GASM*)pc->data;
916   PetscErrorCode ierr;
917   PetscInt       blocks,ovl;
918   PetscBool      flg;
919   PCGASMType     gasmtype;
920 
921   PetscFunctionBegin;
922   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
923   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
924   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
925   if (flg) {
926     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
927   }
928   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
929   if (flg) {
930     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
931     osm->dm_subdomains = PETSC_FALSE;
932   }
933   flg  = PETSC_FALSE;
934   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
935   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
936   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
937   ierr = PetscOptionsTail();CHKERRQ(ierr);
938   PetscFunctionReturn(0);
939 }
940 
941 /*------------------------------------------------------------------------------------*/
942 
943 /*@
944     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
945                                communicator.
946     Logically collective on pc
947 
948     Input Parameters:
949 +   pc  - the preconditioner
950 -   N   - total number of subdomains
951 
952 
953     Level: beginner
954 
955 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
956           PCGASMCreateSubdomains2D()
957 @*/
958 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
959 {
960   PC_GASM        *osm = (PC_GASM*)pc->data;
961   PetscMPIInt    size,rank;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
966   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
967 
968   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
969   osm->ois = osm->iis = NULL;
970 
971   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
972   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
973   osm->N    = N;
974   osm->n    = PETSC_DETERMINE;
975   osm->nmax = PETSC_DETERMINE;
976   osm->dm_subdomains = PETSC_FALSE;
977   PetscFunctionReturn(0);
978 }
979 
980 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
981 {
982   PC_GASM         *osm = (PC_GASM*)pc->data;
983   PetscErrorCode  ierr;
984   PetscInt        i;
985 
986   PetscFunctionBegin;
987   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
988   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
989 
990   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
991   osm->iis  = osm->ois = NULL;
992   osm->n    = n;
993   osm->N    = PETSC_DETERMINE;
994   osm->nmax = PETSC_DETERMINE;
995   if (ois) {
996     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
997     for (i=0; i<n; i++) {
998       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
999       osm->ois[i] = ois[i];
1000     }
1001     /*
1002        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1003        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1004     */
1005     osm->overlap = -1;
1006     /* inner subdomains must be provided  */
1007     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1008   }/* end if */
1009   if (iis) {
1010     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1011     for (i=0; i<n; i++) {
1012       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1013       osm->iis[i] = iis[i];
1014     }
1015     if (!ois) {
1016       osm->ois = NULL;
1017       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
1018     }
1019   }
1020   if (PetscDefined(USE_DEBUG)) {
1021     PetscInt        j,rstart,rend,*covered,lsize;
1022     const PetscInt  *indices;
1023     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1024     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1025     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1026     /* check if the current processor owns indices from others */
1027     for (i=0; i<n; i++) {
1028       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1029       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1030       for (j=0; j<lsize; j++) {
1031         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1032         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1033         else covered[indices[j]-rstart] = 1;
1034       }
1035     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1036     }
1037     /* check if we miss any indices */
1038     for (i=rstart; i<rend; i++) {
1039       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1040     }
1041     ierr = PetscFree(covered);CHKERRQ(ierr);
1042   }
1043   if (iis)  osm->user_subdomains = PETSC_TRUE;
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1048 {
1049   PC_GASM *osm = (PC_GASM*)pc->data;
1050 
1051   PetscFunctionBegin;
1052   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1053   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1054   if (!pc->setupcalled) osm->overlap = ovl;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1059 {
1060   PC_GASM *osm = (PC_GASM*)pc->data;
1061 
1062   PetscFunctionBegin;
1063   osm->type     = type;
1064   osm->type_set = PETSC_TRUE;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1069 {
1070   PC_GASM *osm = (PC_GASM*)pc->data;
1071 
1072   PetscFunctionBegin;
1073   osm->sort_indices = doSort;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*
1078    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1079         In particular, it would upset the global subdomain number calculation.
1080 */
1081 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1082 {
1083   PC_GASM        *osm = (PC_GASM*)pc->data;
1084   PetscErrorCode ierr;
1085 
1086   PetscFunctionBegin;
1087   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1088 
1089   if (n) *n = osm->n;
1090   if (first) {
1091     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1092     *first -= osm->n;
1093   }
1094   if (ksp) {
1095     /* Assume that local solves are now different; not necessarily
1096        true, though!  This flag is used only for PCView_GASM() */
1097     *ksp                        = osm->ksp;
1098     osm->same_subdomain_solvers = PETSC_FALSE;
1099   }
1100   PetscFunctionReturn(0);
1101 } /* PCGASMGetSubKSP_GASM() */
1102 
1103 /*@C
1104     PCGASMSetSubdomains - Sets the subdomains for this processor
1105     for the additive Schwarz preconditioner.
1106 
1107     Collective on pc
1108 
1109     Input Parameters:
1110 +   pc  - the preconditioner object
1111 .   n   - the number of subdomains for this processor
1112 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1113 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1114 
1115     Notes:
1116     The IS indices use the parallel, global numbering of the vector entries.
1117     Inner subdomains are those where the correction is applied.
1118     Outer subdomains are those where the residual necessary to obtain the
1119     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1120     Both inner and outer subdomains can extend over several processors.
1121     This processor's portion of a subdomain is known as a local subdomain.
1122 
1123     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1124     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1125     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1126     on another MPI process.
1127 
1128     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1129 
1130 
1131     Level: advanced
1132 
1133 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1134           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1135 @*/
1136 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1137 {
1138   PC_GASM *osm = (PC_GASM*)pc->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1143   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1144   osm->dm_subdomains = PETSC_FALSE;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /*@
1149     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1150     additive Schwarz preconditioner.  Either all or no processors in the
1151     pc communicator must call this routine.
1152 
1153     Logically Collective on pc
1154 
1155     Input Parameters:
1156 +   pc  - the preconditioner context
1157 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1158 
1159     Options Database Key:
1160 .   -pc_gasm_overlap <overlap> - Sets overlap
1161 
1162     Notes:
1163     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1164     multiple subdomain per perocessor or "straddling" subdomains that intersect
1165     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1166 
1167     The overlap defaults to 0, so if one desires that no additional
1168     overlap be computed beyond what may have been set with a call to
1169     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1170     not explicitly set the subdomains in application code, then all overlap would be computed
1171     internally by PETSc, and using an overlap of 0 would result in an GASM
1172     variant that is equivalent to the block Jacobi preconditioner.
1173 
1174     Note that one can define initial index sets with any overlap via
1175     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1176     PETSc to extend that overlap further, if desired.
1177 
1178     Level: intermediate
1179 
1180 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1181           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1182 @*/
1183 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1184 {
1185   PetscErrorCode ierr;
1186   PC_GASM *osm = (PC_GASM*)pc->data;
1187 
1188   PetscFunctionBegin;
1189   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1190   PetscValidLogicalCollectiveInt(pc,ovl,2);
1191   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1192   osm->dm_subdomains = PETSC_FALSE;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 /*@
1197     PCGASMSetType - Sets the type of restriction and interpolation used
1198     for local problems in the additive Schwarz method.
1199 
1200     Logically Collective on PC
1201 
1202     Input Parameters:
1203 +   pc  - the preconditioner context
1204 -   type - variant of GASM, one of
1205 .vb
1206       PC_GASM_BASIC       - full interpolation and restriction
1207       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1208       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1209       PC_GASM_NONE        - local processor restriction and interpolation
1210 .ve
1211 
1212     Options Database Key:
1213 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1214 
1215     Level: intermediate
1216 
1217 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1218           PCGASMCreateSubdomains2D()
1219 @*/
1220 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1221 {
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1226   PetscValidLogicalCollectiveEnum(pc,type,2);
1227   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 /*@
1232     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1233 
1234     Logically Collective on PC
1235 
1236     Input Parameters:
1237 +   pc  - the preconditioner context
1238 -   doSort - sort the subdomain indices
1239 
1240     Level: intermediate
1241 
1242 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1243           PCGASMCreateSubdomains2D()
1244 @*/
1245 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1246 {
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1251   PetscValidLogicalCollectiveBool(pc,doSort,2);
1252   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 /*@C
1257    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1258    this processor.
1259 
1260    Collective on PC iff first_local is requested
1261 
1262    Input Parameter:
1263 .  pc - the preconditioner context
1264 
1265    Output Parameters:
1266 +  n_local - the number of blocks on this processor or NULL
1267 .  first_local - the global number of the first block on this processor or NULL,
1268                  all processors must request or all must pass NULL
1269 -  ksp - the array of KSP contexts
1270 
1271    Note:
1272    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1273 
1274    Currently for some matrix implementations only 1 block per processor
1275    is supported.
1276 
1277    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1278 
1279    Level: advanced
1280 
1281 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1282           PCGASMCreateSubdomains2D(),
1283 @*/
1284 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1285 {
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1290   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 /* -------------------------------------------------------------------------------------*/
1295 /*MC
1296    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1297            its own KSP object.
1298 
1299    Options Database Keys:
1300 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1301 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1302 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1303 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1304 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1305 
1306      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1307       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1308       -pc_gasm_type basic to use the standard GASM.
1309 
1310    Notes:
1311     Blocks can be shared by multiple processes.
1312 
1313      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1314         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1315 
1316      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1317          and set the options directly on the resulting KSP object (you can access its PC
1318          with KSPGetPC())
1319 
1320 
1321    Level: beginner
1322 
1323     References:
1324 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1325      Courant Institute, New York University Technical report
1326 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1327     Cambridge University Press.
1328 
1329 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1330            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1331            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1332 
1333 M*/
1334 
1335 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1336 {
1337   PetscErrorCode ierr;
1338   PC_GASM        *osm;
1339 
1340   PetscFunctionBegin;
1341   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1342 
1343   osm->N                        = PETSC_DETERMINE;
1344   osm->n                        = PETSC_DECIDE;
1345   osm->nmax                     = PETSC_DETERMINE;
1346   osm->overlap                  = 0;
1347   osm->ksp                      = NULL;
1348   osm->gorestriction            = NULL;
1349   osm->girestriction            = NULL;
1350   osm->pctoouter                = NULL;
1351   osm->gx                       = NULL;
1352   osm->gy                       = NULL;
1353   osm->x                        = NULL;
1354   osm->y                        = NULL;
1355   osm->pcx                      = NULL;
1356   osm->pcy                      = NULL;
1357   osm->permutationIS            = NULL;
1358   osm->permutationP             = NULL;
1359   osm->pcmat                    = NULL;
1360   osm->ois                      = NULL;
1361   osm->iis                      = NULL;
1362   osm->pmat                     = NULL;
1363   osm->type                     = PC_GASM_RESTRICT;
1364   osm->same_subdomain_solvers   = PETSC_TRUE;
1365   osm->sort_indices             = PETSC_TRUE;
1366   osm->dm_subdomains            = PETSC_FALSE;
1367   osm->hierarchicalpartitioning = PETSC_FALSE;
1368 
1369   pc->data                 = (void*)osm;
1370   pc->ops->apply           = PCApply_GASM;
1371   pc->ops->matapply        = PCMatApply_GASM;
1372   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1373   pc->ops->setup           = PCSetUp_GASM;
1374   pc->ops->reset           = PCReset_GASM;
1375   pc->ops->destroy         = PCDestroy_GASM;
1376   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1377   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1378   pc->ops->view            = PCView_GASM;
1379   pc->ops->applyrichardson = NULL;
1380 
1381   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1382   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1390 {
1391   MatPartitioning mpart;
1392   const char      *prefix;
1393   PetscInt        i,j,rstart,rend,bs;
1394   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1395   Mat             Ad     = NULL, adj;
1396   IS              ispart,isnumb,*is;
1397   PetscErrorCode  ierr;
1398 
1399   PetscFunctionBegin;
1400   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1401 
1402   /* Get prefix, row distribution, and block size */
1403   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1404   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1405   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1406   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1407 
1408   /* Get diagonal block from matrix if possible */
1409   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1410   if (hasop) {
1411     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1412   }
1413   if (Ad) {
1414     ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1415     if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1416   }
1417   if (Ad && nloc > 1) {
1418     PetscBool  match,done;
1419     /* Try to setup a good matrix partitioning if available */
1420     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1421     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1422     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1423     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1424     if (!match) {
1425       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1426     }
1427     if (!match) { /* assume a "good" partitioner is available */
1428       PetscInt       na;
1429       const PetscInt *ia,*ja;
1430       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1431       if (done) {
1432         /* Build adjacency matrix by hand. Unfortunately a call to
1433            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1434            remove the block-aij structure and we cannot expect
1435            MatPartitioning to split vertices as we need */
1436         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1437         const PetscInt *row;
1438         nnz = 0;
1439         for (i=0; i<na; i++) { /* count number of nonzeros */
1440           len = ia[i+1] - ia[i];
1441           row = ja + ia[i];
1442           for (j=0; j<len; j++) {
1443             if (row[j] == i) { /* don't count diagonal */
1444               len--; break;
1445             }
1446           }
1447           nnz += len;
1448         }
1449         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1450         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1451         nnz    = 0;
1452         iia[0] = 0;
1453         for (i=0; i<na; i++) { /* fill adjacency */
1454           cnt = 0;
1455           len = ia[i+1] - ia[i];
1456           row = ja + ia[i];
1457           for (j=0; j<len; j++) {
1458             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1459           }
1460           nnz += cnt;
1461           iia[i+1] = nnz;
1462         }
1463         /* Partitioning of the adjacency matrix */
1464         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1465         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1466         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1467         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1468         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1469         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1470         foundpart = PETSC_TRUE;
1471       }
1472       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1473     }
1474     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1475   }
1476   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1477   if (!foundpart) {
1478 
1479     /* Partitioning by contiguous chunks of rows */
1480 
1481     PetscInt mbs   = (rend-rstart)/bs;
1482     PetscInt start = rstart;
1483     for (i=0; i<nloc; i++) {
1484       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1485       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1486       start += count;
1487     }
1488 
1489   } else {
1490 
1491     /* Partitioning by adjacency of diagonal block  */
1492 
1493     const PetscInt *numbering;
1494     PetscInt       *count,nidx,*indices,*newidx,start=0;
1495     /* Get node count in each partition */
1496     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1497     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1498     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1499       for (i=0; i<nloc; i++) count[i] *= bs;
1500     }
1501     /* Build indices from node numbering */
1502     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1503     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1504     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1505     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1506     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1507     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1508     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1509       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1510       for (i=0; i<nidx; i++) {
1511         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1512       }
1513       ierr    = PetscFree(indices);CHKERRQ(ierr);
1514       nidx   *= bs;
1515       indices = newidx;
1516     }
1517     /* Shift to get global indices */
1518     for (i=0; i<nidx; i++) indices[i] += rstart;
1519 
1520     /* Build the index sets for each block */
1521     for (i=0; i<nloc; i++) {
1522       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1523       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1524       start += count[i];
1525     }
1526 
1527     ierr = PetscFree(count);CHKERRQ(ierr);
1528     ierr = PetscFree(indices);CHKERRQ(ierr);
1529     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1530     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1531   }
1532   *iis = is;
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1537 {
1538   PetscErrorCode  ierr;
1539 
1540   PetscFunctionBegin;
1541   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 /*@C
1546    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1547    Schwarz preconditioner for a any problem based on its matrix.
1548 
1549    Collective
1550 
1551    Input Parameters:
1552 +  A       - The global matrix operator
1553 -  N       - the number of global subdomains requested
1554 
1555    Output Parameters:
1556 +  n   - the number of subdomains created on this processor
1557 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1558 
1559    Level: advanced
1560 
1561    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1562          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1563          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1564          outer subdomains will be automatically generated from these according to the requested amount of
1565          overlap; this is currently supported only with local subdomains.
1566 
1567 
1568 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1569 @*/
1570 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1571 {
1572   PetscMPIInt     size;
1573   PetscErrorCode  ierr;
1574 
1575   PetscFunctionBegin;
1576   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1577   PetscValidPointer(iis,4);
1578 
1579   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1580   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1581   if (N >= size) {
1582     *n = N/size + (N%size);
1583     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1584   } else {
1585     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /*@C
1591    PCGASMDestroySubdomains - Destroys the index sets created with
1592    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1593    called after setting subdomains with PCGASMSetSubdomains().
1594 
1595    Collective
1596 
1597    Input Parameters:
1598 +  n   - the number of index sets
1599 .  iis - the array of inner subdomains,
1600 -  ois - the array of outer subdomains, can be NULL
1601 
1602    Level: intermediate
1603 
1604    Notes:
1605     this is merely a convenience subroutine that walks each list,
1606    destroys each IS on the list, and then frees the list. At the end the
1607    list pointers are set to NULL.
1608 
1609 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1610 @*/
1611 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1612 {
1613   PetscInt       i;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   if (n <= 0) PetscFunctionReturn(0);
1618   if (ois) {
1619     PetscValidPointer(ois,3);
1620     if (*ois) {
1621       PetscValidPointer(*ois,3);
1622       for (i=0; i<n; i++) {
1623         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1624       }
1625       ierr = PetscFree((*ois));CHKERRQ(ierr);
1626     }
1627   }
1628   if (iis) {
1629     PetscValidPointer(iis,2);
1630     if (*iis) {
1631       PetscValidPointer(*iis,2);
1632       for (i=0; i<n; i++) {
1633         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1634       }
1635       ierr = PetscFree((*iis));CHKERRQ(ierr);
1636     }
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1642   {                                                                                                       \
1643     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1644     /*                                                                                                    \
1645      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1646      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1647      subdomain).                                                                                          \
1648      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1649      of the intersection.                                                                                 \
1650     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1651     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1652     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1653     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1654     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1655     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1656     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1657     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1658     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1659     /* Now compute the size of the local subdomain n. */ \
1660     *n = 0;                                               \
1661     if (*ylow_loc < *yhigh_loc) {                           \
1662       PetscInt width = xright-xleft;                     \
1663       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1664       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1665       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1666     } \
1667   }
1668 
1669 /*@
1670    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1671    preconditioner for a two-dimensional problem on a regular grid.
1672 
1673    Collective
1674 
1675    Input Parameters:
1676 +  M, N               - the global number of grid points in the x and y directions
1677 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1678 .  dof                - degrees of freedom per node
1679 -  overlap            - overlap in mesh lines
1680 
1681    Output Parameters:
1682 +  Nsub - the number of local subdomains created
1683 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1684 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1685 
1686 
1687    Level: advanced
1688 
1689 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1690 @*/
1691 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1692 {
1693   PetscErrorCode ierr;
1694   PetscMPIInt    size, rank;
1695   PetscInt       i, j;
1696   PetscInt       maxheight, maxwidth;
1697   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1698   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1699   PetscInt       x[2][2], y[2][2], n[2];
1700   PetscInt       first, last;
1701   PetscInt       nidx, *idx;
1702   PetscInt       ii,jj,s,q,d;
1703   PetscInt       k,kk;
1704   PetscMPIInt    color;
1705   MPI_Comm       comm, subcomm;
1706   IS             **xis = NULL, **is = ois, **is_local = iis;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1710   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1711   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
1712   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1713   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1714                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1715 
1716   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1717   s      = 0;
1718   ystart = 0;
1719   for (j=0; j<Ndomains; ++j) {
1720     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1721     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1722     /* Vertical domain limits with an overlap. */
1723     ylow   = PetscMax(ystart - overlap,0);
1724     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1725     xstart = 0;
1726     for (i=0; i<Mdomains; ++i) {
1727       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1728       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1729       /* Horizontal domain limits with an overlap. */
1730       xleft  = PetscMax(xstart - overlap,0);
1731       xright = PetscMin(xstart + maxwidth + overlap,M);
1732       /*
1733          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1734       */
1735       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1736       if (nidx) ++s;
1737       xstart += maxwidth;
1738     } /* for (i = 0; i < Mdomains; ++i) */
1739     ystart += maxheight;
1740   } /* for (j = 0; j < Ndomains; ++j) */
1741 
1742   /* Now we can allocate the necessary number of ISs. */
1743   *nsub  = s;
1744   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1745   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1746   s      = 0;
1747   ystart = 0;
1748   for (j=0; j<Ndomains; ++j) {
1749     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1750     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1751     /* Vertical domain limits with an overlap. */
1752     y[0][0] = PetscMax(ystart - overlap,0);
1753     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1754     /* Vertical domain limits without an overlap. */
1755     y[1][0] = ystart;
1756     y[1][1] = PetscMin(ystart + maxheight,N);
1757     xstart  = 0;
1758     for (i=0; i<Mdomains; ++i) {
1759       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1760       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1761       /* Horizontal domain limits with an overlap. */
1762       x[0][0] = PetscMax(xstart - overlap,0);
1763       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1764       /* Horizontal domain limits without an overlap. */
1765       x[1][0] = xstart;
1766       x[1][1] = PetscMin(xstart+maxwidth,M);
1767       /*
1768          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1769          Do this twice: first for the domains with overlaps, and once without.
1770          During the first pass create the subcommunicators, and use them on the second pass as well.
1771       */
1772       for (q = 0; q < 2; ++q) {
1773         PetscBool split = PETSC_FALSE;
1774         /*
1775           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1776           according to whether the domain with an overlap or without is considered.
1777         */
1778         xleft = x[q][0]; xright = x[q][1];
1779         ylow  = y[q][0]; yhigh  = y[q][1];
1780         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1781         nidx *= dof;
1782         n[q]  = nidx;
1783         /*
1784          Based on the counted number of indices in the local domain *with an overlap*,
1785          construct a subcommunicator of all the processors supporting this domain.
1786          Observe that a domain with an overlap might have nontrivial local support,
1787          while the domain without an overlap might not.  Hence, the decision to participate
1788          in the subcommunicator must be based on the domain with an overlap.
1789          */
1790         if (q == 0) {
1791           if (nidx) color = 1;
1792           else color = MPI_UNDEFINED;
1793           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr);
1794           split = PETSC_TRUE;
1795         }
1796         /*
1797          Proceed only if the number of local indices *with an overlap* is nonzero.
1798          */
1799         if (n[0]) {
1800           if (q == 0) xis = is;
1801           if (q == 1) {
1802             /*
1803              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1804              Moreover, if the overlap is zero, the two ISs are identical.
1805              */
1806             if (overlap == 0) {
1807               (*is_local)[s] = (*is)[s];
1808               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1809               continue;
1810             } else {
1811               xis     = is_local;
1812               subcomm = ((PetscObject)(*is)[s])->comm;
1813             }
1814           } /* if (q == 1) */
1815           idx  = NULL;
1816           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1817           if (nidx) {
1818             k = 0;
1819             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1820               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1821               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1822               kk = dof*(M*jj + x0);
1823               for (ii=x0; ii<x1; ++ii) {
1824                 for (d = 0; d < dof; ++d) {
1825                   idx[k++] = kk++;
1826                 }
1827               }
1828             }
1829           }
1830           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1831           if (split) {
1832             ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr);
1833           }
1834         }/* if (n[0]) */
1835       }/* for (q = 0; q < 2; ++q) */
1836       if (n[0]) ++s;
1837       xstart += maxwidth;
1838     } /* for (i = 0; i < Mdomains; ++i) */
1839     ystart += maxheight;
1840   } /* for (j = 0; j < Ndomains; ++j) */
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 /*@C
1845     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1846     for the additive Schwarz preconditioner.
1847 
1848     Not Collective
1849 
1850     Input Parameter:
1851 .   pc - the preconditioner context
1852 
1853     Output Parameters:
1854 +   n   - the number of subdomains for this processor (default value = 1)
1855 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1856 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1857 
1858 
1859     Notes:
1860     The user is responsible for destroying the ISs and freeing the returned arrays.
1861     The IS numbering is in the parallel, global numbering of the vector.
1862 
1863     Level: advanced
1864 
1865 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1866           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1867 @*/
1868 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1869 {
1870   PC_GASM        *osm;
1871   PetscErrorCode ierr;
1872   PetscBool      match;
1873   PetscInt       i;
1874 
1875   PetscFunctionBegin;
1876   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1877   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1878   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1879   osm = (PC_GASM*)pc->data;
1880   if (n) *n = osm->n;
1881   if (iis) {
1882     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1883   }
1884   if (ois) {
1885     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1886   }
1887   if (iis || ois) {
1888     for (i = 0; i < osm->n; ++i) {
1889       if (iis) (*iis)[i] = osm->iis[i];
1890       if (ois) (*ois)[i] = osm->ois[i];
1891     }
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 /*@C
1897     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1898     only) for the additive Schwarz preconditioner.
1899 
1900     Not Collective
1901 
1902     Input Parameter:
1903 .   pc - the preconditioner context
1904 
1905     Output Parameters:
1906 +   n   - the number of matrices for this processor (default value = 1)
1907 -   mat - the matrices
1908 
1909     Notes:
1910     matrices returned by this routine have the same communicators as the index sets (IS)
1911            used to define subdomains in PCGASMSetSubdomains()
1912     Level: advanced
1913 
1914 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1915           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1916 @*/
1917 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1918 {
1919   PC_GASM        *osm;
1920   PetscErrorCode ierr;
1921   PetscBool      match;
1922 
1923   PetscFunctionBegin;
1924   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1925   PetscValidIntPointer(n,2);
1926   if (mat) PetscValidPointer(mat,3);
1927   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1928   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1929   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1930   osm = (PC_GASM*)pc->data;
1931   if (n) *n = osm->n;
1932   if (mat) *mat = osm->pmat;
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 /*@
1937     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1938     Logically Collective
1939 
1940     Input Parameter:
1941 +   pc  - the preconditioner
1942 -   flg - boolean indicating whether to use subdomains defined by the DM
1943 
1944     Options Database Key:
1945 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1946 
1947     Level: intermediate
1948 
1949     Notes:
1950     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1951     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1952     automatically turns the latter off.
1953 
1954 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1955           PCGASMCreateSubdomains2D()
1956 @*/
1957 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1958 {
1959   PC_GASM        *osm = (PC_GASM*)pc->data;
1960   PetscErrorCode ierr;
1961   PetscBool      match;
1962 
1963   PetscFunctionBegin;
1964   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1965   PetscValidLogicalCollectiveBool(pc,flg,2);
1966   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1967   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1968   if (match) {
1969     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1970       osm->dm_subdomains = flg;
1971     }
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /*@
1977     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1978     Not Collective
1979 
1980     Input Parameter:
1981 .   pc  - the preconditioner
1982 
1983     Output Parameter:
1984 .   flg - boolean indicating whether to use subdomains defined by the DM
1985 
1986     Level: intermediate
1987 
1988 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1989           PCGASMCreateSubdomains2D()
1990 @*/
1991 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1992 {
1993   PC_GASM        *osm = (PC_GASM*)pc->data;
1994   PetscErrorCode ierr;
1995   PetscBool      match;
1996 
1997   PetscFunctionBegin;
1998   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1999   PetscValidBoolPointer(flg,2);
2000   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
2001   if (match) {
2002     if (flg) *flg = osm->dm_subdomains;
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006