xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision d21efd2e5d911db017a545648c4fa4838359bb2d)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   PetscCheckFalse(i < 0 || i > osm->n,PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73 #define len  16*(nidx+1)+1
74   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78   for (j = 0; j < nidx; ++j) {
79     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80   }
81   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91   ierr = PetscFree(cidx);CHKERRQ(ierr);
92   /* Outer subdomains. */
93   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94   /*
95    No more than 15 characters per index plus a space.
96    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98    For nidx == 0, the whole string 16 '\0'.
99    */
100 #define len  16*(nidx+1)+1
101   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
123 {
124   PC_GASM        *osm = (PC_GASM*)pc->data;
125   const char     *prefix;
126   char           fname[PETSC_MAX_PATH_LEN+1];
127   PetscInt       l, d, count;
128   PetscBool      found;
129   PetscViewer    viewer, sviewer = NULL;
130   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135   ierr = PetscOptionsHasName(NULL,prefix,"-pc_gasm_print_subdomains",&found);CHKERRQ(ierr);
136   if (!found) PetscFunctionReturn(0);
137   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
138   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
139   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
140   /*
141    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
142    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
143   */
144   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
145   l    = 0;
146   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
147   for (count = 0; count < osm->N; ++count) {
148     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
149     if (l<osm->n) {
150       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
151       if (numbering[d] == count) {
152         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
153         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
154         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
155         ++l;
156       }
157     }
158     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
159   }
160   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
161   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
166 {
167   PC_GASM        *osm = (PC_GASM*)pc->data;
168   const char     *prefix;
169   PetscErrorCode ierr;
170   PetscMPIInt    rank, size;
171   PetscInt       bsz;
172   PetscBool      iascii,view_subdomains=PETSC_FALSE;
173   PetscViewer    sviewer;
174   PetscInt       count, l;
175   char           overlap[256]     = "user-defined overlap";
176   char           gsubdomains[256] = "unknown total number of subdomains";
177   char           msubdomains[256] = "unknown max number of local subdomains";
178   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
179 
180   PetscFunctionBegin;
181   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr);
182   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr);
183 
184   if (osm->overlap >= 0) {
185     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
186   }
187   if (osm->N != PETSC_DETERMINE) {
188     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
189   }
190   if (osm->nmax != PETSC_DETERMINE) {
191     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
192   }
193 
194   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
195   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
196 
197   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
198   if (iascii) {
199     /*
200      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
201      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
202      collectively on the comm.
203      */
204     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
205     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
208     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
209     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
210     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
211     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
212     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
213     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
214     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
215     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
217     /* Make sure that everybody waits for the banner to be printed. */
218     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
219     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
220     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
221     l = 0;
222     for (count = 0; count < osm->N; ++count) {
223       PetscMPIInt srank, ssize;
224       if (l<osm->n) {
225         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
226         if (numbering[d] == count) {
227           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr);
228           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr);
229           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
230           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
231           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
232           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
233           if (view_subdomains) {
234             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
235           }
236           if (!pc->setupcalled) {
237             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
238           } else {
239             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
240           }
241           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
242           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
243           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
244           ++l;
245         }
246       }
247       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
248     }
249     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
250     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
251     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
252     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
253     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
259 
260 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
261 {
262    PC_GASM              *osm = (PC_GASM*)pc->data;
263    MatPartitioning       part;
264    MPI_Comm              comm;
265    PetscMPIInt           size;
266    PetscInt              nlocalsubdomains,fromrows_localsize;
267    IS                    partitioning,fromrows,isn;
268    Vec                   outervec;
269    PetscErrorCode        ierr;
270 
271    PetscFunctionBegin;
272    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
273    ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
274    /* we do not need a hierarchical partitioning when
275     * the total number of subdomains is consistent with
276     * the number of MPI tasks.
277     * For the following cases, we do not need to use HP
278     * */
279    if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
280    PetscCheckFalse(size%osm->N != 0,PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d ",osm->N,size);
281    nlocalsubdomains = size/osm->N;
282    osm->n           = 1;
283    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
284    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
285    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
286    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
287    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
288    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
289    /* get new processor owner number of each vertex */
290    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
291    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
292    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
293    ierr = ISDestroy(&isn);CHKERRQ(ierr);
294    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
295    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
296    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
297    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
298    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
299    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
300    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
301    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
302    osm->permutationIS = fromrows;
303    osm->pcmat =  pc->pmat;
304    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
305    pc->pmat = osm->permutationP;
306    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
307    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
308    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
309    osm->n           = PETSC_DETERMINE;
310    PetscFunctionReturn(0);
311 }
312 
313 static PetscErrorCode PCSetUp_GASM(PC pc)
314 {
315   PC_GASM        *osm = (PC_GASM*)pc->data;
316   PetscErrorCode ierr;
317   PetscInt       i,nInnerIndices,nTotalInnerIndices;
318   PetscMPIInt    rank, size;
319   MatReuse       scall = MAT_REUSE_MATRIX;
320   KSP            ksp;
321   PC             subpc;
322   const char     *prefix,*pprefix;
323   Vec            x,y;
324   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
325   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
326   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
327   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
328   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
329   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
330   PetscScalar    *gxarray, *gyarray;
331   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
332   PetscInt       num_subdomains    = 0;
333   DM             *subdomain_dm     = NULL;
334   char           **subdomain_names = NULL;
335   PetscInt       *numbering;
336 
337   PetscFunctionBegin;
338   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
339   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
340   if (!pc->setupcalled) {
341         /* use a hierarchical partitioning */
342     if (osm->hierarchicalpartitioning) {
343       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
344     }
345     if (osm->n == PETSC_DETERMINE) {
346       if (osm->N != PETSC_DETERMINE) {
347            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
348            ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
349       } else if (osm->dm_subdomains && pc->dm) {
350         /* try pc->dm next, if allowed */
351         PetscInt  d;
352         IS       *inner_subdomain_is, *outer_subdomain_is;
353         ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
354         if (num_subdomains) {
355           ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
356         }
357         for (d = 0; d < num_subdomains; ++d) {
358           if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
359           if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
360         }
361         ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
362         ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
363       } else {
364         /* still no subdomains; use one per processor */
365         osm->nmax = osm->n = 1;
366         ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
367         osm->N    = size;
368         ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
369       }
370     }
371     if (!osm->iis) {
372       /*
373        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
374        We create the requisite number of local inner subdomains and then expand them into
375        out subdomains, if necessary.
376        */
377       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
378     }
379     if (!osm->ois) {
380       /*
381             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
382             has been requested, copy the inner subdomains over so they can be modified.
383       */
384       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
385       for (i=0; i<osm->n; ++i) {
386         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
387           ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
388           ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
389         } else {
390           ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
391           osm->ois[i] = osm->iis[i];
392         }
393       }
394       if (osm->overlap>0 && osm->N>1) {
395         /* Extend the "overlapping" regions by a number of steps */
396         ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
397       }
398     }
399 
400     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
401     if (osm->nmax == PETSC_DETERMINE) {
402       PetscMPIInt inwork,outwork;
403       /* determine global number of subdomains and the max number of local subdomains */
404       inwork     = osm->n;
405       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
406       osm->nmax  = outwork;
407     }
408     if (osm->N == PETSC_DETERMINE) {
409       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
410       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
411     }
412 
413     if (osm->sort_indices) {
414       for (i=0; i<osm->n; i++) {
415         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
416         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
417       }
418     }
419     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
420     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
421 
422     /*
423        Merge the ISs, create merged vectors and restrictions.
424      */
425     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
426     on = 0;
427     for (i=0; i<osm->n; i++) {
428       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
429       on  += oni;
430     }
431     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
432     on   = 0;
433     /* Merge local indices together */
434     for (i=0; i<osm->n; i++) {
435       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
436       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
437       ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
438       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
439       on  += oni;
440     }
441     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
442     nTotalInnerIndices = 0;
443     for (i=0; i<osm->n; i++) {
444       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
445       nTotalInnerIndices += nInnerIndices;
446     }
447     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
448     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
449 
450     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
451     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
452     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
453     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
454     /* gois might indices not on local */
455     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
456     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
457     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
458     ierr = VecDestroy(&x);CHKERRQ(ierr);
459     ierr = ISDestroy(&gois);CHKERRQ(ierr);
460 
461     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
462     {
463       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
464       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
465       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
466       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
467       IS              giis;          /* IS for the disjoint union of inner subdomains. */
468       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
469       PetscScalar    *array;
470       const PetscInt *indices;
471       PetscInt        k;
472       on = 0;
473       for (i=0; i<osm->n; i++) {
474         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
475         on  += oni;
476       }
477       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
478       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
479       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
480       /* set communicator id to determine where overlap is */
481       in   = 0;
482       for (i=0; i<osm->n; i++) {
483         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
484         for (k = 0; k < ini; ++k) {
485           array[in+k] = numbering[i];
486         }
487         in += ini;
488       }
489       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
490       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
493       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
494       on  = 0;
495       in  = 0;
496       for (i=0; i<osm->n; i++) {
497         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
498         ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
499         for (k=0; k<oni; k++) {
500           /*  skip overlapping indices to get inner domain */
501           if (PetscRealPart(array[on+k]) != numbering[i]) continue;
502           iidx[in]    = indices[k];
503           ioidx[in++] = gostart+on+k;
504         }
505         ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
506         on += oni;
507       }
508       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
509       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
510       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
511       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
512       ierr = VecDestroy(&y);CHKERRQ(ierr);
513       ierr = ISDestroy(&giis);CHKERRQ(ierr);
514       ierr = ISDestroy(&giois);CHKERRQ(ierr);
515     }
516     ierr = ISDestroy(&goid);CHKERRQ(ierr);
517     ierr = PetscFree(numbering);CHKERRQ(ierr);
518 
519     /* Create the subdomain work vectors. */
520     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
521     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
522     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
523     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
524     for (i=0, on=0; i<osm->n; ++i, on += oni) {
525       PetscInt oNi;
526       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
527       /* on a sub communicator */
528       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
529       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
530       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
531     }
532     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
533     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
534     /* Create the subdomain solvers */
535     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
536     for (i=0; i<osm->n; i++) {
537       char subprefix[PETSC_MAX_PATH_LEN+1];
538       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
539       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
540       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
541       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
542       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
543       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
544       if (subdomain_dm) {
545         ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
546         ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
547       }
548       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
549       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
550       if (subdomain_names && subdomain_names[i]) {
551         ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
552         ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
553         ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
554       }
555       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
556       osm->ksp[i] = ksp;
557     }
558     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
559     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
560     scall = MAT_INITIAL_MATRIX;
561   } else { /* if (pc->setupcalled) */
562     /*
563        Destroy the submatrices from the previous iteration
564     */
565     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
566       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
567       scall = MAT_INITIAL_MATRIX;
568     }
569     if (osm->permutationIS) {
570       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
571       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
572       osm->pcmat = pc->pmat;
573       pc->pmat   = osm->permutationP;
574     }
575   }
576 
577   /*
578      Extract the submatrices.
579   */
580   if (size > 1) {
581     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
582   } else {
583     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
584   }
585   if (scall == MAT_INITIAL_MATRIX) {
586     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
587     for (i=0; i<osm->n; i++) {
588       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
589       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
590     }
591   }
592 
593   /* Return control to the user so that the submatrices can be modified (e.g., to apply
594      different boundary conditions for the submatrices than for the global problem) */
595   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
596 
597   /*
598      Loop over submatrices putting them into local ksps
599   */
600   for (i=0; i<osm->n; i++) {
601     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
602     ierr = KSPGetOptionsPrefix(osm->ksp[i],&prefix);CHKERRQ(ierr);
603     ierr = MatSetOptionsPrefix(osm->pmat[i],prefix);CHKERRQ(ierr);
604     if (!pc->setupcalled) {
605       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
606     }
607   }
608   if (osm->pcmat) {
609     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
610     pc->pmat   = osm->pcmat;
611     osm->pcmat = NULL;
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
617 {
618   PC_GASM        *osm = (PC_GASM*)pc->data;
619   PetscErrorCode ierr;
620   PetscInt       i;
621 
622   PetscFunctionBegin;
623   for (i=0; i<osm->n; i++) {
624     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
630 {
631   PC_GASM        *osm = (PC_GASM*)pc->data;
632   PetscErrorCode ierr;
633   PetscInt       i;
634   Vec            x,y;
635   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
636 
637   PetscFunctionBegin;
638   if (osm->pctoouter) {
639     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
640     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
641     x = osm->pcx;
642     y = osm->pcy;
643   } else {
644     x = xin;
645     y = yout;
646   }
647   /*
648      support for limiting the restriction or interpolation only to the inner
649      subdomain values (leaving the other values 0).
650   */
651   if (!(osm->type & PC_GASM_RESTRICT)) {
652     /* have to zero the work RHS since scatter may leave some slots empty */
653     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
654     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
655   } else {
656     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
657   }
658   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
659   if (!(osm->type & PC_GASM_RESTRICT)) {
660     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
661   } else {
662     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
663   }
664   /* do the subdomain solves */
665   for (i=0; i<osm->n; ++i) {
666     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
667     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
668   }
669   /* do we need to zero y? */
670   ierr = VecZeroEntries(y);CHKERRQ(ierr);
671   if (!(osm->type & PC_GASM_INTERPOLATE)) {
672     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
673     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
674   } else {
675     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
676     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
677   }
678   if (osm->pctoouter) {
679     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
680     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
681   }
682   PetscFunctionReturn(0);
683 }
684 
685 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
686 {
687   PC_GASM        *osm = (PC_GASM*)pc->data;
688   Mat            X,Y,O=NULL,Z,W;
689   Vec            x,y;
690   PetscInt       i,m,M,N;
691   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBegin;
695   PetscCheckFalse(osm->n != 1,PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
696   ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
697   if (osm->pctoouter) {
698     ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
699     ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
700     ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
701     for (i = 0; i < N; ++i) {
702       ierr = MatDenseGetColumnVecRead(Xin,i,&x);CHKERRQ(ierr);
703       ierr = MatDenseGetColumnVecWrite(O,i,&y);CHKERRQ(ierr);
704       ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
705       ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
706       ierr = MatDenseRestoreColumnVecWrite(O,i,&y);CHKERRQ(ierr);
707       ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);CHKERRQ(ierr);
708     }
709     X = Y = O;
710   } else {
711     X = Xin;
712     Y = Yout;
713   }
714   /*
715      support for limiting the restriction or interpolation only to the inner
716      subdomain values (leaving the other values 0).
717   */
718   ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
719   ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
720   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
721   for (i = 0; i < N; ++i) {
722     ierr = MatDenseGetColumnVecRead(X,i,&x);CHKERRQ(ierr);
723     ierr = MatDenseGetColumnVecWrite(Z,i,&y);CHKERRQ(ierr);
724     if (!(osm->type & PC_GASM_RESTRICT)) {
725       /* have to zero the work RHS since scatter may leave some slots empty */
726       ierr = VecZeroEntries(y);CHKERRQ(ierr);
727       ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
728       ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
729     } else {
730       ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
731       ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
732     }
733     ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);CHKERRQ(ierr);
734     ierr = MatDenseRestoreColumnVecRead(X,i,&x);CHKERRQ(ierr);
735   }
736   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
737   ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
740   /* do the subdomain solve */
741   ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
742   ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
743   ierr = MatDestroy(&Z);CHKERRQ(ierr);
744   /* do we need to zero y? */
745   ierr = MatZeroEntries(Y);CHKERRQ(ierr);
746   for (i = 0; i < N; ++i) {
747     ierr = MatDenseGetColumnVecWrite(Y,i,&y);CHKERRQ(ierr);
748     ierr = MatDenseGetColumnVecRead(W,i,&x);CHKERRQ(ierr);
749     if (!(osm->type & PC_GASM_INTERPOLATE)) {
750       ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
751       ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
752     } else {
753       ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
754       ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
755     }
756     ierr = MatDenseRestoreColumnVecRead(W,i,&x);CHKERRQ(ierr);
757     if (osm->pctoouter) {
758       ierr = MatDenseGetColumnVecWrite(Yout,i,&x);CHKERRQ(ierr);
759       ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760       ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
761       ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);CHKERRQ(ierr);
762     }
763     ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);CHKERRQ(ierr);
764   }
765   ierr = MatDestroy(&W);CHKERRQ(ierr);
766   ierr = MatDestroy(&O);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
771 {
772   PC_GASM        *osm = (PC_GASM*)pc->data;
773   PetscErrorCode ierr;
774   PetscInt       i;
775   Vec            x,y;
776   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
777 
778   PetscFunctionBegin;
779   if (osm->pctoouter) {
780    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
781    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782    x = osm->pcx;
783    y = osm->pcy;
784   }else{
785         x = xin;
786         y = yout;
787   }
788   /*
789      Support for limiting the restriction or interpolation to only local
790      subdomain values (leaving the other values 0).
791 
792      Note: these are reversed from the PCApply_GASM() because we are applying the
793      transpose of the three terms
794   */
795   if (!(osm->type & PC_GASM_INTERPOLATE)) {
796     /* have to zero the work RHS since scatter may leave some slots empty */
797     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
798     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
799   } else {
800     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
801   }
802   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
803   if (!(osm->type & PC_GASM_INTERPOLATE)) {
804     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
805   } else {
806     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
807   }
808   /* do the local solves */
809   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
810     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
811     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
812   }
813   ierr = VecZeroEntries(y);CHKERRQ(ierr);
814   if (!(osm->type & PC_GASM_RESTRICT)) {
815     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
816     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
817   } else {
818     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
819     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
820   }
821   if (osm->pctoouter) {
822    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   }
825   PetscFunctionReturn(0);
826 }
827 
828 static PetscErrorCode PCReset_GASM(PC pc)
829 {
830   PC_GASM        *osm = (PC_GASM*)pc->data;
831   PetscErrorCode ierr;
832   PetscInt       i;
833 
834   PetscFunctionBegin;
835   if (osm->ksp) {
836     for (i=0; i<osm->n; i++) {
837       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
838     }
839   }
840   if (osm->pmat) {
841     if (osm->n > 0) {
842       PetscMPIInt size;
843       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
844       if (size > 1) {
845         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
846         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
847       } else {
848         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
849       }
850     }
851   }
852   if (osm->x) {
853     for (i=0; i<osm->n; i++) {
854       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
855       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
856     }
857   }
858   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
859   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
860 
861   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
862   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
863   if (!osm->user_subdomains) {
864     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
865     osm->N    = PETSC_DETERMINE;
866     osm->nmax = PETSC_DETERMINE;
867   }
868   if (osm->pctoouter) {
869         ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
870   }
871   if (osm->permutationIS) {
872         ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
873   }
874   if (osm->pcx) {
875         ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
876   }
877   if (osm->pcy) {
878         ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
879   }
880   if (osm->permutationP) {
881     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
882   }
883   if (osm->pcmat) {
884         ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscErrorCode PCDestroy_GASM(PC pc)
890 {
891   PC_GASM        *osm = (PC_GASM*)pc->data;
892   PetscErrorCode ierr;
893   PetscInt       i;
894 
895   PetscFunctionBegin;
896   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
897   /* PCReset will not destroy subdomains, if user_subdomains is true. */
898   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
899   if (osm->ksp) {
900     for (i=0; i<osm->n; i++) {
901       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
902     }
903     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
904   }
905   ierr = PetscFree(osm->x);CHKERRQ(ierr);
906   ierr = PetscFree(osm->y);CHKERRQ(ierr);
907   ierr = PetscFree(pc->data);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
912 {
913   PC_GASM        *osm = (PC_GASM*)pc->data;
914   PetscErrorCode ierr;
915   PetscInt       blocks,ovl;
916   PetscBool      flg;
917   PCGASMType     gasmtype;
918 
919   PetscFunctionBegin;
920   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
921   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
922   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
923   if (flg) {
924     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
925   }
926   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
927   if (flg) {
928     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
929     osm->dm_subdomains = PETSC_FALSE;
930   }
931   flg  = PETSC_FALSE;
932   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
933   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
934   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
935   ierr = PetscOptionsTail();CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 /*------------------------------------------------------------------------------------*/
940 
941 /*@
942     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
943                                communicator.
944     Logically collective on pc
945 
946     Input Parameters:
947 +   pc  - the preconditioner
948 -   N   - total number of subdomains
949 
950     Level: beginner
951 
952 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
953           PCGASMCreateSubdomains2D()
954 @*/
955 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
956 {
957   PC_GASM        *osm = (PC_GASM*)pc->data;
958   PetscMPIInt    size,rank;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   PetscCheckFalse(N < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
963   PetscCheckFalse(pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
964 
965   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
966   osm->ois = osm->iis = NULL;
967 
968   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
969   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
970   osm->N    = N;
971   osm->n    = PETSC_DETERMINE;
972   osm->nmax = PETSC_DETERMINE;
973   osm->dm_subdomains = PETSC_FALSE;
974   PetscFunctionReturn(0);
975 }
976 
977 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
978 {
979   PC_GASM         *osm = (PC_GASM*)pc->data;
980   PetscErrorCode  ierr;
981   PetscInt        i;
982 
983   PetscFunctionBegin;
984   PetscCheckFalse(n < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
985   PetscCheckFalse(pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
986 
987   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
988   osm->iis  = osm->ois = NULL;
989   osm->n    = n;
990   osm->N    = PETSC_DETERMINE;
991   osm->nmax = PETSC_DETERMINE;
992   if (ois) {
993     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
994     for (i=0; i<n; i++) {
995       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
996       osm->ois[i] = ois[i];
997     }
998     /*
999        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1000        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1001     */
1002     osm->overlap = -1;
1003     /* inner subdomains must be provided  */
1004     PetscCheckFalse(!iis,PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided ");
1005   }/* end if */
1006   if (iis) {
1007     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1008     for (i=0; i<n; i++) {
1009       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1010       osm->iis[i] = iis[i];
1011     }
1012     if (!ois) {
1013       osm->ois = NULL;
1014       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
1015     }
1016   }
1017   if (PetscDefined(USE_DEBUG)) {
1018     PetscInt        j,rstart,rend,*covered,lsize;
1019     const PetscInt  *indices;
1020     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1021     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1022     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1023     /* check if the current processor owns indices from others */
1024     for (i=0; i<n; i++) {
1025       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1026       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1027       for (j=0; j<lsize; j++) {
1028         PetscCheckFalse(indices[j]<rstart || indices[j]>=rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1029         else PetscCheckFalse(covered[indices[j]-rstart]==1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1030         else covered[indices[j]-rstart] = 1;
1031       }
1032     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1033     }
1034     /* check if we miss any indices */
1035     for (i=rstart; i<rend; i++) {
1036       PetscCheckFalse(!covered[i-rstart],PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1037     }
1038     ierr = PetscFree(covered);CHKERRQ(ierr);
1039   }
1040   if (iis)  osm->user_subdomains = PETSC_TRUE;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1045 {
1046   PC_GASM *osm = (PC_GASM*)pc->data;
1047 
1048   PetscFunctionBegin;
1049   PetscCheckFalse(ovl < 0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1050   PetscCheckFalse(pc->setupcalled && ovl != osm->overlap,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1051   if (!pc->setupcalled) osm->overlap = ovl;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1056 {
1057   PC_GASM *osm = (PC_GASM*)pc->data;
1058 
1059   PetscFunctionBegin;
1060   osm->type     = type;
1061   osm->type_set = PETSC_TRUE;
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1066 {
1067   PC_GASM *osm = (PC_GASM*)pc->data;
1068 
1069   PetscFunctionBegin;
1070   osm->sort_indices = doSort;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1076         In particular, it would upset the global subdomain number calculation.
1077 */
1078 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1079 {
1080   PC_GASM        *osm = (PC_GASM*)pc->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   PetscCheckFalse(osm->n < 1,PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1085 
1086   if (n) *n = osm->n;
1087   if (first) {
1088     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
1089     *first -= osm->n;
1090   }
1091   if (ksp) {
1092     /* Assume that local solves are now different; not necessarily
1093        true, though!  This flag is used only for PCView_GASM() */
1094     *ksp                        = osm->ksp;
1095     osm->same_subdomain_solvers = PETSC_FALSE;
1096   }
1097   PetscFunctionReturn(0);
1098 } /* PCGASMGetSubKSP_GASM() */
1099 
1100 /*@C
1101     PCGASMSetSubdomains - Sets the subdomains for this processor
1102     for the additive Schwarz preconditioner.
1103 
1104     Collective on pc
1105 
1106     Input Parameters:
1107 +   pc  - the preconditioner object
1108 .   n   - the number of subdomains for this processor
1109 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1110 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1111 
1112     Notes:
1113     The IS indices use the parallel, global numbering of the vector entries.
1114     Inner subdomains are those where the correction is applied.
1115     Outer subdomains are those where the residual necessary to obtain the
1116     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1117     Both inner and outer subdomains can extend over several processors.
1118     This processor's portion of a subdomain is known as a local subdomain.
1119 
1120     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1121     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1122     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1123     on another MPI process.
1124 
1125     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1126 
1127     Level: advanced
1128 
1129 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1130           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1131 @*/
1132 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1133 {
1134   PC_GASM *osm = (PC_GASM*)pc->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1139   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1140   osm->dm_subdomains = PETSC_FALSE;
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 /*@
1145     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1146     additive Schwarz preconditioner.  Either all or no processors in the
1147     pc communicator must call this routine.
1148 
1149     Logically Collective on pc
1150 
1151     Input Parameters:
1152 +   pc  - the preconditioner context
1153 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1154 
1155     Options Database Key:
1156 .   -pc_gasm_overlap <overlap> - Sets overlap
1157 
1158     Notes:
1159     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1160     multiple subdomain per perocessor or "straddling" subdomains that intersect
1161     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1162 
1163     The overlap defaults to 0, so if one desires that no additional
1164     overlap be computed beyond what may have been set with a call to
1165     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1166     not explicitly set the subdomains in application code, then all overlap would be computed
1167     internally by PETSc, and using an overlap of 0 would result in an GASM
1168     variant that is equivalent to the block Jacobi preconditioner.
1169 
1170     Note that one can define initial index sets with any overlap via
1171     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1172     PETSc to extend that overlap further, if desired.
1173 
1174     Level: intermediate
1175 
1176 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1177           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1178 @*/
1179 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1180 {
1181   PetscErrorCode ierr;
1182   PC_GASM *osm = (PC_GASM*)pc->data;
1183 
1184   PetscFunctionBegin;
1185   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1186   PetscValidLogicalCollectiveInt(pc,ovl,2);
1187   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1188   osm->dm_subdomains = PETSC_FALSE;
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 /*@
1193     PCGASMSetType - Sets the type of restriction and interpolation used
1194     for local problems in the additive Schwarz method.
1195 
1196     Logically Collective on PC
1197 
1198     Input Parameters:
1199 +   pc  - the preconditioner context
1200 -   type - variant of GASM, one of
1201 .vb
1202       PC_GASM_BASIC       - full interpolation and restriction
1203       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1204       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1205       PC_GASM_NONE        - local processor restriction and interpolation
1206 .ve
1207 
1208     Options Database Key:
1209 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1210 
1211     Level: intermediate
1212 
1213 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1214           PCGASMCreateSubdomains2D()
1215 @*/
1216 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1217 {
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1222   PetscValidLogicalCollectiveEnum(pc,type,2);
1223   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 /*@
1228     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1229 
1230     Logically Collective on PC
1231 
1232     Input Parameters:
1233 +   pc  - the preconditioner context
1234 -   doSort - sort the subdomain indices
1235 
1236     Level: intermediate
1237 
1238 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1239           PCGASMCreateSubdomains2D()
1240 @*/
1241 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1242 {
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1247   PetscValidLogicalCollectiveBool(pc,doSort,2);
1248   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 /*@C
1253    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1254    this processor.
1255 
1256    Collective on PC iff first_local is requested
1257 
1258    Input Parameter:
1259 .  pc - the preconditioner context
1260 
1261    Output Parameters:
1262 +  n_local - the number of blocks on this processor or NULL
1263 .  first_local - the global number of the first block on this processor or NULL,
1264                  all processors must request or all must pass NULL
1265 -  ksp - the array of KSP contexts
1266 
1267    Note:
1268    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1269 
1270    Currently for some matrix implementations only 1 block per processor
1271    is supported.
1272 
1273    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1274 
1275    Level: advanced
1276 
1277 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1278           PCGASMCreateSubdomains2D(),
1279 @*/
1280 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1281 {
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1286   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 /* -------------------------------------------------------------------------------------*/
1291 /*MC
1292    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1293            its own KSP object.
1294 
1295    Options Database Keys:
1296 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1297 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1298 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1299 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1300 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1301 
1302      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1303       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1304       -pc_gasm_type basic to use the standard GASM.
1305 
1306    Notes:
1307     Blocks can be shared by multiple processes.
1308 
1309      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1310         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1311 
1312      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1313          and set the options directly on the resulting KSP object (you can access its PC
1314          with KSPGetPC())
1315 
1316    Level: beginner
1317 
1318     References:
1319 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1320      Courant Institute, New York University Technical report
1321 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1322     Cambridge University Press.
1323 
1324 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1325            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1326            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1327 
1328 M*/
1329 
1330 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1331 {
1332   PetscErrorCode ierr;
1333   PC_GASM        *osm;
1334 
1335   PetscFunctionBegin;
1336   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1337 
1338   osm->N                        = PETSC_DETERMINE;
1339   osm->n                        = PETSC_DECIDE;
1340   osm->nmax                     = PETSC_DETERMINE;
1341   osm->overlap                  = 0;
1342   osm->ksp                      = NULL;
1343   osm->gorestriction            = NULL;
1344   osm->girestriction            = NULL;
1345   osm->pctoouter                = NULL;
1346   osm->gx                       = NULL;
1347   osm->gy                       = NULL;
1348   osm->x                        = NULL;
1349   osm->y                        = NULL;
1350   osm->pcx                      = NULL;
1351   osm->pcy                      = NULL;
1352   osm->permutationIS            = NULL;
1353   osm->permutationP             = NULL;
1354   osm->pcmat                    = NULL;
1355   osm->ois                      = NULL;
1356   osm->iis                      = NULL;
1357   osm->pmat                     = NULL;
1358   osm->type                     = PC_GASM_RESTRICT;
1359   osm->same_subdomain_solvers   = PETSC_TRUE;
1360   osm->sort_indices             = PETSC_TRUE;
1361   osm->dm_subdomains            = PETSC_FALSE;
1362   osm->hierarchicalpartitioning = PETSC_FALSE;
1363 
1364   pc->data                 = (void*)osm;
1365   pc->ops->apply           = PCApply_GASM;
1366   pc->ops->matapply        = PCMatApply_GASM;
1367   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1368   pc->ops->setup           = PCSetUp_GASM;
1369   pc->ops->reset           = PCReset_GASM;
1370   pc->ops->destroy         = PCDestroy_GASM;
1371   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1372   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1373   pc->ops->view            = PCView_GASM;
1374   pc->ops->applyrichardson = NULL;
1375 
1376   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1377   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1378   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1379   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1380   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1385 {
1386   MatPartitioning mpart;
1387   const char      *prefix;
1388   PetscInt        i,j,rstart,rend,bs;
1389   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1390   Mat             Ad     = NULL, adj;
1391   IS              ispart,isnumb,*is;
1392   PetscErrorCode  ierr;
1393 
1394   PetscFunctionBegin;
1395   PetscCheckFalse(nloc < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1396 
1397   /* Get prefix, row distribution, and block size */
1398   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1399   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1400   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1401   PetscCheckFalse(rstart/bs*bs != rstart || rend/bs*bs != rend,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1402 
1403   /* Get diagonal block from matrix if possible */
1404   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1405   if (hasop) {
1406     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1407   }
1408   if (Ad) {
1409     ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1410     if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1411   }
1412   if (Ad && nloc > 1) {
1413     PetscBool  match,done;
1414     /* Try to setup a good matrix partitioning if available */
1415     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1416     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1417     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1418     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1419     if (!match) {
1420       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1421     }
1422     if (!match) { /* assume a "good" partitioner is available */
1423       PetscInt       na;
1424       const PetscInt *ia,*ja;
1425       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1426       if (done) {
1427         /* Build adjacency matrix by hand. Unfortunately a call to
1428            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1429            remove the block-aij structure and we cannot expect
1430            MatPartitioning to split vertices as we need */
1431         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1432         const PetscInt *row;
1433         nnz = 0;
1434         for (i=0; i<na; i++) { /* count number of nonzeros */
1435           len = ia[i+1] - ia[i];
1436           row = ja + ia[i];
1437           for (j=0; j<len; j++) {
1438             if (row[j] == i) { /* don't count diagonal */
1439               len--; break;
1440             }
1441           }
1442           nnz += len;
1443         }
1444         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1445         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1446         nnz    = 0;
1447         iia[0] = 0;
1448         for (i=0; i<na; i++) { /* fill adjacency */
1449           cnt = 0;
1450           len = ia[i+1] - ia[i];
1451           row = ja + ia[i];
1452           for (j=0; j<len; j++) {
1453             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1454           }
1455           nnz += cnt;
1456           iia[i+1] = nnz;
1457         }
1458         /* Partitioning of the adjacency matrix */
1459         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1460         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1461         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1462         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1463         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1464         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1465         foundpart = PETSC_TRUE;
1466       }
1467       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1468     }
1469     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1470   }
1471   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1472   if (!foundpart) {
1473 
1474     /* Partitioning by contiguous chunks of rows */
1475 
1476     PetscInt mbs   = (rend-rstart)/bs;
1477     PetscInt start = rstart;
1478     for (i=0; i<nloc; i++) {
1479       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1480       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1481       start += count;
1482     }
1483 
1484   } else {
1485 
1486     /* Partitioning by adjacency of diagonal block  */
1487 
1488     const PetscInt *numbering;
1489     PetscInt       *count,nidx,*indices,*newidx,start=0;
1490     /* Get node count in each partition */
1491     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1492     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1493     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1494       for (i=0; i<nloc; i++) count[i] *= bs;
1495     }
1496     /* Build indices from node numbering */
1497     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1498     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1499     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1500     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1501     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1502     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1503     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1504       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1505       for (i=0; i<nidx; i++) {
1506         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1507       }
1508       ierr    = PetscFree(indices);CHKERRQ(ierr);
1509       nidx   *= bs;
1510       indices = newidx;
1511     }
1512     /* Shift to get global indices */
1513     for (i=0; i<nidx; i++) indices[i] += rstart;
1514 
1515     /* Build the index sets for each block */
1516     for (i=0; i<nloc; i++) {
1517       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1518       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1519       start += count[i];
1520     }
1521 
1522     ierr = PetscFree(count);CHKERRQ(ierr);
1523     ierr = PetscFree(indices);CHKERRQ(ierr);
1524     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1525     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1526   }
1527   *iis = is;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1532 {
1533   PetscErrorCode  ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 /*@C
1541    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1542    Schwarz preconditioner for a any problem based on its matrix.
1543 
1544    Collective
1545 
1546    Input Parameters:
1547 +  A       - The global matrix operator
1548 -  N       - the number of global subdomains requested
1549 
1550    Output Parameters:
1551 +  n   - the number of subdomains created on this processor
1552 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1553 
1554    Level: advanced
1555 
1556    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1557          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1558          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1559          outer subdomains will be automatically generated from these according to the requested amount of
1560          overlap; this is currently supported only with local subdomains.
1561 
1562 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1563 @*/
1564 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1565 {
1566   PetscMPIInt     size;
1567   PetscErrorCode  ierr;
1568 
1569   PetscFunctionBegin;
1570   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1571   PetscValidPointer(iis,4);
1572 
1573   PetscCheckFalse(N < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1574   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1575   if (N >= size) {
1576     *n = N/size + (N%size);
1577     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1578   } else {
1579     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 /*@C
1585    PCGASMDestroySubdomains - Destroys the index sets created with
1586    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1587    called after setting subdomains with PCGASMSetSubdomains().
1588 
1589    Collective
1590 
1591    Input Parameters:
1592 +  n   - the number of index sets
1593 .  iis - the array of inner subdomains,
1594 -  ois - the array of outer subdomains, can be NULL
1595 
1596    Level: intermediate
1597 
1598    Notes:
1599     this is merely a convenience subroutine that walks each list,
1600    destroys each IS on the list, and then frees the list. At the end the
1601    list pointers are set to NULL.
1602 
1603 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1604 @*/
1605 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1606 {
1607   PetscInt       i;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   if (n <= 0) PetscFunctionReturn(0);
1612   if (ois) {
1613     PetscValidPointer(ois,3);
1614     if (*ois) {
1615       PetscValidPointer(*ois,3);
1616       for (i=0; i<n; i++) {
1617         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1618       }
1619       ierr = PetscFree((*ois));CHKERRQ(ierr);
1620     }
1621   }
1622   if (iis) {
1623     PetscValidPointer(iis,2);
1624     if (*iis) {
1625       PetscValidPointer(*iis,2);
1626       for (i=0; i<n; i++) {
1627         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1628       }
1629       ierr = PetscFree((*iis));CHKERRQ(ierr);
1630     }
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1636   {                                                                                                       \
1637     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1638     /*                                                                                                    \
1639      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1640      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1641      subdomain).                                                                                          \
1642      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1643      of the intersection.                                                                                 \
1644     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1645     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1646     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1647     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1648     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1649     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1650     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1651     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1652     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1653     /* Now compute the size of the local subdomain n. */ \
1654     *n = 0;                                               \
1655     if (*ylow_loc < *yhigh_loc) {                           \
1656       PetscInt width = xright-xleft;                     \
1657       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1658       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1659       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1660     } \
1661   }
1662 
1663 /*@
1664    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1665    preconditioner for a two-dimensional problem on a regular grid.
1666 
1667    Collective
1668 
1669    Input Parameters:
1670 +  pc       - the preconditioner context
1671 .  M        - the global number of grid points in the x direction
1672 .  N        - the global number of grid points in the y direction
1673 .  Mdomains - the global number of subdomains in the x direction
1674 .  Ndomains - the global number of subdomains in the y direction
1675 .  dof      - degrees of freedom per node
1676 -  overlap  - overlap in mesh lines
1677 
1678    Output Parameters:
1679 +  Nsub - the number of local subdomains created
1680 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1681 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1682 
1683    Level: advanced
1684 
1685 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1686 @*/
1687 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1688 {
1689   PetscErrorCode ierr;
1690   PetscMPIInt    size, rank;
1691   PetscInt       i, j;
1692   PetscInt       maxheight, maxwidth;
1693   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1694   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1695   PetscInt       x[2][2], y[2][2], n[2];
1696   PetscInt       first, last;
1697   PetscInt       nidx, *idx;
1698   PetscInt       ii,jj,s,q,d;
1699   PetscInt       k,kk;
1700   PetscMPIInt    color;
1701   MPI_Comm       comm, subcomm;
1702   IS             **xis = NULL, **is = ois, **is_local = iis;
1703 
1704   PetscFunctionBegin;
1705   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1706   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1707   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
1708   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1709   PetscCheckFalse(first%dof || last%dof,PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1710                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1711 
1712   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1713   s      = 0;
1714   ystart = 0;
1715   for (j=0; j<Ndomains; ++j) {
1716     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1717     PetscCheckFalse(maxheight < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1718     /* Vertical domain limits with an overlap. */
1719     ylow   = PetscMax(ystart - overlap,0);
1720     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1721     xstart = 0;
1722     for (i=0; i<Mdomains; ++i) {
1723       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1724       PetscCheckFalse(maxwidth < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1725       /* Horizontal domain limits with an overlap. */
1726       xleft  = PetscMax(xstart - overlap,0);
1727       xright = PetscMin(xstart + maxwidth + overlap,M);
1728       /*
1729          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1730       */
1731       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1732       if (nidx) ++s;
1733       xstart += maxwidth;
1734     } /* for (i = 0; i < Mdomains; ++i) */
1735     ystart += maxheight;
1736   } /* for (j = 0; j < Ndomains; ++j) */
1737 
1738   /* Now we can allocate the necessary number of ISs. */
1739   *nsub  = s;
1740   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1741   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1742   s      = 0;
1743   ystart = 0;
1744   for (j=0; j<Ndomains; ++j) {
1745     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1746     PetscCheckFalse(maxheight < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1747     /* Vertical domain limits with an overlap. */
1748     y[0][0] = PetscMax(ystart - overlap,0);
1749     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1750     /* Vertical domain limits without an overlap. */
1751     y[1][0] = ystart;
1752     y[1][1] = PetscMin(ystart + maxheight,N);
1753     xstart  = 0;
1754     for (i=0; i<Mdomains; ++i) {
1755       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1756       PetscCheckFalse(maxwidth < 2,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1757       /* Horizontal domain limits with an overlap. */
1758       x[0][0] = PetscMax(xstart - overlap,0);
1759       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1760       /* Horizontal domain limits without an overlap. */
1761       x[1][0] = xstart;
1762       x[1][1] = PetscMin(xstart+maxwidth,M);
1763       /*
1764          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1765          Do this twice: first for the domains with overlaps, and once without.
1766          During the first pass create the subcommunicators, and use them on the second pass as well.
1767       */
1768       for (q = 0; q < 2; ++q) {
1769         PetscBool split = PETSC_FALSE;
1770         /*
1771           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1772           according to whether the domain with an overlap or without is considered.
1773         */
1774         xleft = x[q][0]; xright = x[q][1];
1775         ylow  = y[q][0]; yhigh  = y[q][1];
1776         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1777         nidx *= dof;
1778         n[q]  = nidx;
1779         /*
1780          Based on the counted number of indices in the local domain *with an overlap*,
1781          construct a subcommunicator of all the processors supporting this domain.
1782          Observe that a domain with an overlap might have nontrivial local support,
1783          while the domain without an overlap might not.  Hence, the decision to participate
1784          in the subcommunicator must be based on the domain with an overlap.
1785          */
1786         if (q == 0) {
1787           if (nidx) color = 1;
1788           else color = MPI_UNDEFINED;
1789           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr);
1790           split = PETSC_TRUE;
1791         }
1792         /*
1793          Proceed only if the number of local indices *with an overlap* is nonzero.
1794          */
1795         if (n[0]) {
1796           if (q == 0) xis = is;
1797           if (q == 1) {
1798             /*
1799              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1800              Moreover, if the overlap is zero, the two ISs are identical.
1801              */
1802             if (overlap == 0) {
1803               (*is_local)[s] = (*is)[s];
1804               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1805               continue;
1806             } else {
1807               xis     = is_local;
1808               subcomm = ((PetscObject)(*is)[s])->comm;
1809             }
1810           } /* if (q == 1) */
1811           idx  = NULL;
1812           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1813           if (nidx) {
1814             k = 0;
1815             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1816               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1817               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1818               kk = dof*(M*jj + x0);
1819               for (ii=x0; ii<x1; ++ii) {
1820                 for (d = 0; d < dof; ++d) {
1821                   idx[k++] = kk++;
1822                 }
1823               }
1824             }
1825           }
1826           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1827           if (split) {
1828             ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr);
1829           }
1830         }/* if (n[0]) */
1831       }/* for (q = 0; q < 2; ++q) */
1832       if (n[0]) ++s;
1833       xstart += maxwidth;
1834     } /* for (i = 0; i < Mdomains; ++i) */
1835     ystart += maxheight;
1836   } /* for (j = 0; j < Ndomains; ++j) */
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 /*@C
1841     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1842     for the additive Schwarz preconditioner.
1843 
1844     Not Collective
1845 
1846     Input Parameter:
1847 .   pc - the preconditioner context
1848 
1849     Output Parameters:
1850 +   n   - the number of subdomains for this processor (default value = 1)
1851 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1852 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1853 
1854     Notes:
1855     The user is responsible for destroying the ISs and freeing the returned arrays.
1856     The IS numbering is in the parallel, global numbering of the vector.
1857 
1858     Level: advanced
1859 
1860 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1861           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1862 @*/
1863 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1864 {
1865   PC_GASM        *osm;
1866   PetscErrorCode ierr;
1867   PetscBool      match;
1868   PetscInt       i;
1869 
1870   PetscFunctionBegin;
1871   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1872   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1873   PetscCheckFalse(!match,PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1874   osm = (PC_GASM*)pc->data;
1875   if (n) *n = osm->n;
1876   if (iis) {
1877     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1878   }
1879   if (ois) {
1880     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1881   }
1882   if (iis || ois) {
1883     for (i = 0; i < osm->n; ++i) {
1884       if (iis) (*iis)[i] = osm->iis[i];
1885       if (ois) (*ois)[i] = osm->ois[i];
1886     }
1887   }
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 /*@C
1892     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1893     only) for the additive Schwarz preconditioner.
1894 
1895     Not Collective
1896 
1897     Input Parameter:
1898 .   pc - the preconditioner context
1899 
1900     Output Parameters:
1901 +   n   - the number of matrices for this processor (default value = 1)
1902 -   mat - the matrices
1903 
1904     Notes:
1905     matrices returned by this routine have the same communicators as the index sets (IS)
1906            used to define subdomains in PCGASMSetSubdomains()
1907     Level: advanced
1908 
1909 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1910           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1911 @*/
1912 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1913 {
1914   PC_GASM        *osm;
1915   PetscErrorCode ierr;
1916   PetscBool      match;
1917 
1918   PetscFunctionBegin;
1919   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1920   PetscValidIntPointer(n,2);
1921   if (mat) PetscValidPointer(mat,3);
1922   PetscCheckFalse(!pc->setupcalled,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1923   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1924   PetscCheckFalse(!match,PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1925   osm = (PC_GASM*)pc->data;
1926   if (n) *n = osm->n;
1927   if (mat) *mat = osm->pmat;
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 /*@
1932     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1933     Logically Collective
1934 
1935     Input Parameters:
1936 +   pc  - the preconditioner
1937 -   flg - boolean indicating whether to use subdomains defined by the DM
1938 
1939     Options Database Key:
1940 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1941 
1942     Level: intermediate
1943 
1944     Notes:
1945     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1946     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1947     automatically turns the latter off.
1948 
1949 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1950           PCGASMCreateSubdomains2D()
1951 @*/
1952 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1953 {
1954   PC_GASM        *osm = (PC_GASM*)pc->data;
1955   PetscErrorCode ierr;
1956   PetscBool      match;
1957 
1958   PetscFunctionBegin;
1959   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1960   PetscValidLogicalCollectiveBool(pc,flg,2);
1961   PetscCheckFalse(pc->setupcalled,((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1962   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1963   if (match) {
1964     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1965       osm->dm_subdomains = flg;
1966     }
1967   }
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 /*@
1972     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1973     Not Collective
1974 
1975     Input Parameter:
1976 .   pc  - the preconditioner
1977 
1978     Output Parameter:
1979 .   flg - boolean indicating whether to use subdomains defined by the DM
1980 
1981     Level: intermediate
1982 
1983 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1984           PCGASMCreateSubdomains2D()
1985 @*/
1986 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1987 {
1988   PC_GASM        *osm = (PC_GASM*)pc->data;
1989   PetscErrorCode ierr;
1990   PetscBool      match;
1991 
1992   PetscFunctionBegin;
1993   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1994   PetscValidBoolPointer(flg,2);
1995   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1996   if (match) {
1997     if (flg) *flg = osm->dm_subdomains;
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001