xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73 #define len  16*(nidx+1)+1
74   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78   for (j = 0; j < nidx; ++j) {
79     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80   }
81   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91   ierr = PetscFree(cidx);CHKERRQ(ierr);
92   /* Outer subdomains. */
93   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94   /*
95    No more than 15 characters per index plus a space.
96    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98    For nidx == 0, the whole string 16 '\0'.
99    */
100 #define len  16*(nidx+1)+1
101   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
123 {
124   PC_GASM        *osm = (PC_GASM*)pc->data;
125   const char     *prefix;
126   char           fname[PETSC_MAX_PATH_LEN+1];
127   PetscInt       l, d, count;
128   PetscBool      doprint,found;
129   PetscViewer    viewer, sviewer = NULL;
130   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135   doprint  = PETSC_FALSE;
136   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
137   if (!doprint) PetscFunctionReturn(0);
138   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
139   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
140   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
141   /*
142    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144   */
145   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
146   l    = 0;
147   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
148   for (count = 0; count < osm->N; ++count) {
149     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150     if (l<osm->n) {
151       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152       if (numbering[d] == count) {
153         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
154         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
155         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156         ++l;
157       }
158     }
159     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
160   }
161   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
162   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167 {
168   PC_GASM        *osm = (PC_GASM*)pc->data;
169   const char     *prefix;
170   PetscErrorCode ierr;
171   PetscMPIInt    rank, size;
172   PetscInt       bsz;
173   PetscBool      iascii,view_subdomains=PETSC_FALSE;
174   PetscViewer    sviewer;
175   PetscInt       count, l;
176   char           overlap[256]     = "user-defined overlap";
177   char           gsubdomains[256] = "unknown total number of subdomains";
178   char           msubdomains[256] = "unknown max number of local subdomains";
179   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
180 
181   PetscFunctionBegin;
182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr);
183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr);
184 
185   if (osm->overlap >= 0) {
186     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
187   }
188   if (osm->N != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
190   }
191   if (osm->nmax != PETSC_DETERMINE) {
192     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
193   }
194 
195   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
196   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
197 
198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
199   if (iascii) {
200     /*
201      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203      collectively on the comm.
204      */
205     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
208     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
209     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
211     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
212     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
214     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
217     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
218     /* Make sure that everybody waits for the banner to be printed. */
219     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
220     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
222     l = 0;
223     for (count = 0; count < osm->N; ++count) {
224       PetscMPIInt srank, ssize;
225       if (l<osm->n) {
226         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227         if (numbering[d] == count) {
228           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr);
229           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr);
230           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
231           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
232           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
233           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
234           if (view_subdomains) {
235             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
236           }
237           if (!pc->setupcalled) {
238             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
239           } else {
240             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
241           }
242           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
243           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
244           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
245           ++l;
246         }
247       }
248       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
249     }
250     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
251     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
253     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
254     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
260 
261 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
262 {
263    PC_GASM              *osm = (PC_GASM*)pc->data;
264    MatPartitioning       part;
265    MPI_Comm              comm;
266    PetscMPIInt           size;
267    PetscInt              nlocalsubdomains,fromrows_localsize;
268    IS                    partitioning,fromrows,isn;
269    Vec                   outervec;
270    PetscErrorCode        ierr;
271 
272    PetscFunctionBegin;
273    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
274    ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
275    /* we do not need a hierarchical partitioning when
276     * the total number of subdomains is consistent with
277     * the number of MPI tasks.
278     * For the following cases, we do not need to use HP
279     * */
280    if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
281    if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
282    nlocalsubdomains = size/osm->N;
283    osm->n           = 1;
284    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
285    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
286    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
287    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
288    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
289    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
290    /* get new processor owner number of each vertex */
291    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
292    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
293    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
294    ierr = ISDestroy(&isn);CHKERRQ(ierr);
295    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
296    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
297    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
298    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
299    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
300    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
301    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
302    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
303    osm->permutationIS = fromrows;
304    osm->pcmat =  pc->pmat;
305    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
306    pc->pmat = osm->permutationP;
307    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
308    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
309    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
310    osm->n           = PETSC_DETERMINE;
311    PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode PCSetUp_GASM(PC pc)
315 {
316   PC_GASM        *osm = (PC_GASM*)pc->data;
317   PetscErrorCode ierr;
318   PetscInt       i,nInnerIndices,nTotalInnerIndices;
319   PetscMPIInt    rank, size;
320   MatReuse       scall = MAT_REUSE_MATRIX;
321   KSP            ksp;
322   PC             subpc;
323   const char     *prefix,*pprefix;
324   Vec            x,y;
325   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
326   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
327   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
328   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
329   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
330   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
331   PetscScalar    *gxarray, *gyarray;
332   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
333   PetscInt       num_subdomains    = 0;
334   DM             *subdomain_dm     = NULL;
335   char           **subdomain_names = NULL;
336   PetscInt       *numbering;
337 
338   PetscFunctionBegin;
339   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
340   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
341   if (!pc->setupcalled) {
342         /* use a hierarchical partitioning */
343     if (osm->hierarchicalpartitioning) {
344       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
345     }
346     if (osm->n == PETSC_DETERMINE) {
347       if (osm->N != PETSC_DETERMINE) {
348            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
349            ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
350       } else if (osm->dm_subdomains && pc->dm) {
351         /* try pc->dm next, if allowed */
352         PetscInt  d;
353         IS       *inner_subdomain_is, *outer_subdomain_is;
354         ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
355         if (num_subdomains) {
356           ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
357         }
358         for (d = 0; d < num_subdomains; ++d) {
359           if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
360           if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
361         }
362         ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
363         ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
364       } else {
365         /* still no subdomains; use one per processor */
366         osm->nmax = osm->n = 1;
367         ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
368         osm->N    = size;
369         ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
370       }
371     }
372     if (!osm->iis) {
373       /*
374        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
375        We create the requisite number of local inner subdomains and then expand them into
376        out subdomains, if necessary.
377        */
378       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
379     }
380     if (!osm->ois) {
381       /*
382             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
383             has been requested, copy the inner subdomains over so they can be modified.
384       */
385       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
386       for (i=0; i<osm->n; ++i) {
387         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
388           ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
389           ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
390         } else {
391           ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
392           osm->ois[i] = osm->iis[i];
393         }
394       }
395       if (osm->overlap>0 && osm->N>1) {
396         /* Extend the "overlapping" regions by a number of steps */
397         ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
398       }
399     }
400 
401     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
402     if (osm->nmax == PETSC_DETERMINE) {
403       PetscMPIInt inwork,outwork;
404       /* determine global number of subdomains and the max number of local subdomains */
405       inwork     = osm->n;
406       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
407       osm->nmax  = outwork;
408     }
409     if (osm->N == PETSC_DETERMINE) {
410       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
411       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
412     }
413 
414     if (osm->sort_indices) {
415       for (i=0; i<osm->n; i++) {
416         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
417         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
418       }
419     }
420     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
421     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
422 
423     /*
424        Merge the ISs, create merged vectors and restrictions.
425      */
426     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
427     on = 0;
428     for (i=0; i<osm->n; i++) {
429       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
430       on  += oni;
431     }
432     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
433     on   = 0;
434     /* Merge local indices together */
435     for (i=0; i<osm->n; i++) {
436       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
437       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
438       ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
439       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
440       on  += oni;
441     }
442     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
443     nTotalInnerIndices = 0;
444     for (i=0; i<osm->n; i++) {
445       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
446       nTotalInnerIndices += nInnerIndices;
447     }
448     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
449     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
450 
451     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
452     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
453     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
454     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
455     /* gois might indices not on local */
456     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
457     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
458     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
459     ierr = VecDestroy(&x);CHKERRQ(ierr);
460     ierr = ISDestroy(&gois);CHKERRQ(ierr);
461 
462     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
463     {
464       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
465       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
466       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
467       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
468       IS              giis;          /* IS for the disjoint union of inner subdomains. */
469       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
470       PetscScalar    *array;
471       const PetscInt *indices;
472       PetscInt        k;
473       on = 0;
474       for (i=0; i<osm->n; i++) {
475         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
476         on  += oni;
477       }
478       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
479       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
480       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
481       /* set communicator id to determine where overlap is */
482       in   = 0;
483       for (i=0; i<osm->n; i++) {
484         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
485         for (k = 0; k < ini; ++k) {
486           array[in+k] = numbering[i];
487         }
488         in += ini;
489       }
490       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
491       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
494       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
495       on  = 0;
496       in  = 0;
497       for (i=0; i<osm->n; i++) {
498         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
499         ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
500         for (k=0; k<oni; k++) {
501           /*  skip overlapping indices to get inner domain */
502           if (PetscRealPart(array[on+k]) != numbering[i]) continue;
503           iidx[in]    = indices[k];
504           ioidx[in++] = gostart+on+k;
505         }
506         ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
507         on += oni;
508       }
509       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
510       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
511       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
512       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
513       ierr = VecDestroy(&y);CHKERRQ(ierr);
514       ierr = ISDestroy(&giis);CHKERRQ(ierr);
515       ierr = ISDestroy(&giois);CHKERRQ(ierr);
516     }
517     ierr = ISDestroy(&goid);CHKERRQ(ierr);
518     ierr = PetscFree(numbering);CHKERRQ(ierr);
519 
520     /* Create the subdomain work vectors. */
521     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
522     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
523     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
524     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
525     for (i=0, on=0; i<osm->n; ++i, on += oni) {
526       PetscInt oNi;
527       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
528       /* on a sub communicator */
529       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
530       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
531       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
532     }
533     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
534     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
535     /* Create the subdomain solvers */
536     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
537     for (i=0; i<osm->n; i++) {
538       char subprefix[PETSC_MAX_PATH_LEN+1];
539       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
540       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
541       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
542       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
543       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
544       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
545       if (subdomain_dm) {
546         ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
547         ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
548       }
549       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
550       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
551       if (subdomain_names && subdomain_names[i]) {
552         ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
553         ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
554         ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
555       }
556       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
557       osm->ksp[i] = ksp;
558     }
559     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
560     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
561     scall = MAT_INITIAL_MATRIX;
562   } else { /* if (pc->setupcalled) */
563     /*
564        Destroy the submatrices from the previous iteration
565     */
566     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
567       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
568       scall = MAT_INITIAL_MATRIX;
569     }
570     if (osm->permutationIS) {
571       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
572       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
573       osm->pcmat = pc->pmat;
574       pc->pmat   = osm->permutationP;
575     }
576   }
577 
578   /*
579      Extract the submatrices.
580   */
581   if (size > 1) {
582     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
583   } else {
584     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
585   }
586   if (scall == MAT_INITIAL_MATRIX) {
587     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
588     for (i=0; i<osm->n; i++) {
589       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
590       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
591     }
592   }
593 
594   /* Return control to the user so that the submatrices can be modified (e.g., to apply
595      different boundary conditions for the submatrices than for the global problem) */
596   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
597 
598   /*
599      Loop over submatrices putting them into local ksps
600   */
601   for (i=0; i<osm->n; i++) {
602     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
603     ierr = KSPGetOptionsPrefix(osm->ksp[i],&prefix);CHKERRQ(ierr);
604     ierr = MatSetOptionsPrefix(osm->pmat[i],prefix);CHKERRQ(ierr);
605     if (!pc->setupcalled) {
606       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
607     }
608   }
609   if (osm->pcmat) {
610     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
611     pc->pmat   = osm->pcmat;
612     osm->pcmat = NULL;
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
618 {
619   PC_GASM        *osm = (PC_GASM*)pc->data;
620   PetscErrorCode ierr;
621   PetscInt       i;
622 
623   PetscFunctionBegin;
624   for (i=0; i<osm->n; i++) {
625     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
631 {
632   PC_GASM        *osm = (PC_GASM*)pc->data;
633   PetscErrorCode ierr;
634   PetscInt       i;
635   Vec            x,y;
636   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
637 
638   PetscFunctionBegin;
639   if (osm->pctoouter) {
640     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
641     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
642     x = osm->pcx;
643     y = osm->pcy;
644   } else {
645     x = xin;
646     y = yout;
647   }
648   /*
649      support for limiting the restriction or interpolation only to the inner
650      subdomain values (leaving the other values 0).
651   */
652   if (!(osm->type & PC_GASM_RESTRICT)) {
653     /* have to zero the work RHS since scatter may leave some slots empty */
654     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
655     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
656   } else {
657     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
658   }
659   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
660   if (!(osm->type & PC_GASM_RESTRICT)) {
661     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
662   } else {
663     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
664   }
665   /* do the subdomain solves */
666   for (i=0; i<osm->n; ++i) {
667     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
668     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
669   }
670   /* do we need to zero y? */
671   ierr = VecZeroEntries(y);CHKERRQ(ierr);
672   if (!(osm->type & PC_GASM_INTERPOLATE)) {
673     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
674     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
675   } else {
676     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
677     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
678   }
679   if (osm->pctoouter) {
680     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
681     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
682   }
683   PetscFunctionReturn(0);
684 }
685 
686 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
687 {
688   PC_GASM        *osm = (PC_GASM*)pc->data;
689   Mat            X,Y,O=NULL,Z,W;
690   Vec            x,y;
691   PetscInt       i,m,M,N;
692   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
693   PetscErrorCode ierr;
694 
695   PetscFunctionBegin;
696   if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
697   ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
698   if (osm->pctoouter) {
699     ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
700     ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
701     ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
702     for (i = 0; i < N; ++i) {
703       ierr = MatDenseGetColumnVecRead(Xin,i,&x);CHKERRQ(ierr);
704       ierr = MatDenseGetColumnVecWrite(O,i,&y);CHKERRQ(ierr);
705       ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
706       ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
707       ierr = MatDenseRestoreColumnVecWrite(O,i,&y);CHKERRQ(ierr);
708       ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);CHKERRQ(ierr);
709     }
710     X = Y = O;
711   } else {
712     X = Xin;
713     Y = Yout;
714   }
715   /*
716      support for limiting the restriction or interpolation only to the inner
717      subdomain values (leaving the other values 0).
718   */
719   ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
720   ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
721   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
722   for (i = 0; i < N; ++i) {
723     ierr = MatDenseGetColumnVecRead(X,i,&x);CHKERRQ(ierr);
724     ierr = MatDenseGetColumnVecWrite(Z,i,&y);CHKERRQ(ierr);
725     if (!(osm->type & PC_GASM_RESTRICT)) {
726       /* have to zero the work RHS since scatter may leave some slots empty */
727       ierr = VecZeroEntries(y);CHKERRQ(ierr);
728       ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
729       ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
730     } else {
731       ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
732       ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
733     }
734     ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);CHKERRQ(ierr);
735     ierr = MatDenseRestoreColumnVecRead(X,i,&x);CHKERRQ(ierr);
736   }
737   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
738   ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
739   ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
741   /* do the subdomain solve */
742   ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
743   ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
744   ierr = MatDestroy(&Z);CHKERRQ(ierr);
745   /* do we need to zero y? */
746   ierr = MatZeroEntries(Y);CHKERRQ(ierr);
747   for (i = 0; i < N; ++i) {
748     ierr = MatDenseGetColumnVecWrite(Y,i,&y);CHKERRQ(ierr);
749     ierr = MatDenseGetColumnVecRead(W,i,&x);CHKERRQ(ierr);
750     if (!(osm->type & PC_GASM_INTERPOLATE)) {
751       ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
752       ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
753     } else {
754       ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
755       ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
756     }
757     ierr = MatDenseRestoreColumnVecRead(W,i,&x);CHKERRQ(ierr);
758     if (osm->pctoouter) {
759       ierr = MatDenseGetColumnVecWrite(Yout,i,&x);CHKERRQ(ierr);
760       ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
761       ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
762       ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);CHKERRQ(ierr);
763     }
764     ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);CHKERRQ(ierr);
765   }
766   ierr = MatDestroy(&W);CHKERRQ(ierr);
767   ierr = MatDestroy(&O);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
772 {
773   PC_GASM        *osm = (PC_GASM*)pc->data;
774   PetscErrorCode ierr;
775   PetscInt       i;
776   Vec            x,y;
777   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
778 
779   PetscFunctionBegin;
780   if (osm->pctoouter) {
781    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783    x = osm->pcx;
784    y = osm->pcy;
785   }else{
786         x = xin;
787         y = yout;
788   }
789   /*
790      Support for limiting the restriction or interpolation to only local
791      subdomain values (leaving the other values 0).
792 
793      Note: these are reversed from the PCApply_GASM() because we are applying the
794      transpose of the three terms
795   */
796   if (!(osm->type & PC_GASM_INTERPOLATE)) {
797     /* have to zero the work RHS since scatter may leave some slots empty */
798     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
799     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
800   } else {
801     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
802   }
803   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
804   if (!(osm->type & PC_GASM_INTERPOLATE)) {
805     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
806   } else {
807     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
808   }
809   /* do the local solves */
810   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
811     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
812     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
813   }
814   ierr = VecZeroEntries(y);CHKERRQ(ierr);
815   if (!(osm->type & PC_GASM_RESTRICT)) {
816     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
817     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
818   } else {
819     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
820     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
821   }
822   if (osm->pctoouter) {
823    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   }
826   PetscFunctionReturn(0);
827 }
828 
829 static PetscErrorCode PCReset_GASM(PC pc)
830 {
831   PC_GASM        *osm = (PC_GASM*)pc->data;
832   PetscErrorCode ierr;
833   PetscInt       i;
834 
835   PetscFunctionBegin;
836   if (osm->ksp) {
837     for (i=0; i<osm->n; i++) {
838       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
839     }
840   }
841   if (osm->pmat) {
842     if (osm->n > 0) {
843       PetscMPIInt size;
844       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
845       if (size > 1) {
846         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
847         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
848       } else {
849         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
850       }
851     }
852   }
853   if (osm->x) {
854     for (i=0; i<osm->n; i++) {
855       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
856       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
857     }
858   }
859   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
860   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
861 
862   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
863   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
864   if (!osm->user_subdomains) {
865     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
866     osm->N    = PETSC_DETERMINE;
867     osm->nmax = PETSC_DETERMINE;
868   }
869   if (osm->pctoouter) {
870         ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
871   }
872   if (osm->permutationIS) {
873         ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
874   }
875   if (osm->pcx) {
876         ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
877   }
878   if (osm->pcy) {
879         ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
880   }
881   if (osm->permutationP) {
882     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
883   }
884   if (osm->pcmat) {
885         ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode PCDestroy_GASM(PC pc)
891 {
892   PC_GASM        *osm = (PC_GASM*)pc->data;
893   PetscErrorCode ierr;
894   PetscInt       i;
895 
896   PetscFunctionBegin;
897   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
898   /* PCReset will not destroy subdomains, if user_subdomains is true. */
899   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
900   if (osm->ksp) {
901     for (i=0; i<osm->n; i++) {
902       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
903     }
904     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
905   }
906   ierr = PetscFree(osm->x);CHKERRQ(ierr);
907   ierr = PetscFree(osm->y);CHKERRQ(ierr);
908   ierr = PetscFree(pc->data);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
913 {
914   PC_GASM        *osm = (PC_GASM*)pc->data;
915   PetscErrorCode ierr;
916   PetscInt       blocks,ovl;
917   PetscBool      flg;
918   PCGASMType     gasmtype;
919 
920   PetscFunctionBegin;
921   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
922   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
923   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
924   if (flg) {
925     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
926   }
927   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
928   if (flg) {
929     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
930     osm->dm_subdomains = PETSC_FALSE;
931   }
932   flg  = PETSC_FALSE;
933   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
934   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
935   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
936   ierr = PetscOptionsTail();CHKERRQ(ierr);
937   PetscFunctionReturn(0);
938 }
939 
940 /*------------------------------------------------------------------------------------*/
941 
942 /*@
943     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
944                                communicator.
945     Logically collective on pc
946 
947     Input Parameters:
948 +   pc  - the preconditioner
949 -   N   - total number of subdomains
950 
951     Level: beginner
952 
953 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
954           PCGASMCreateSubdomains2D()
955 @*/
956 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
957 {
958   PC_GASM        *osm = (PC_GASM*)pc->data;
959   PetscMPIInt    size,rank;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
964   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
965 
966   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
967   osm->ois = osm->iis = NULL;
968 
969   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
970   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
971   osm->N    = N;
972   osm->n    = PETSC_DETERMINE;
973   osm->nmax = PETSC_DETERMINE;
974   osm->dm_subdomains = PETSC_FALSE;
975   PetscFunctionReturn(0);
976 }
977 
978 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
979 {
980   PC_GASM         *osm = (PC_GASM*)pc->data;
981   PetscErrorCode  ierr;
982   PetscInt        i;
983 
984   PetscFunctionBegin;
985   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
986   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
987 
988   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
989   osm->iis  = osm->ois = NULL;
990   osm->n    = n;
991   osm->N    = PETSC_DETERMINE;
992   osm->nmax = PETSC_DETERMINE;
993   if (ois) {
994     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
995     for (i=0; i<n; i++) {
996       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
997       osm->ois[i] = ois[i];
998     }
999     /*
1000        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1001        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1002     */
1003     osm->overlap = -1;
1004     /* inner subdomains must be provided  */
1005     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1006   }/* end if */
1007   if (iis) {
1008     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1009     for (i=0; i<n; i++) {
1010       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1011       osm->iis[i] = iis[i];
1012     }
1013     if (!ois) {
1014       osm->ois = NULL;
1015       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
1016     }
1017   }
1018   if (PetscDefined(USE_DEBUG)) {
1019     PetscInt        j,rstart,rend,*covered,lsize;
1020     const PetscInt  *indices;
1021     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1022     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1023     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1024     /* check if the current processor owns indices from others */
1025     for (i=0; i<n; i++) {
1026       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1027       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1028       for (j=0; j<lsize; j++) {
1029         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1030         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1031         else covered[indices[j]-rstart] = 1;
1032       }
1033     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1034     }
1035     /* check if we miss any indices */
1036     for (i=rstart; i<rend; i++) {
1037       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1038     }
1039     ierr = PetscFree(covered);CHKERRQ(ierr);
1040   }
1041   if (iis)  osm->user_subdomains = PETSC_TRUE;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1046 {
1047   PC_GASM *osm = (PC_GASM*)pc->data;
1048 
1049   PetscFunctionBegin;
1050   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1051   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1052   if (!pc->setupcalled) osm->overlap = ovl;
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1057 {
1058   PC_GASM *osm = (PC_GASM*)pc->data;
1059 
1060   PetscFunctionBegin;
1061   osm->type     = type;
1062   osm->type_set = PETSC_TRUE;
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1067 {
1068   PC_GASM *osm = (PC_GASM*)pc->data;
1069 
1070   PetscFunctionBegin;
1071   osm->sort_indices = doSort;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 /*
1076    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1077         In particular, it would upset the global subdomain number calculation.
1078 */
1079 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1080 {
1081   PC_GASM        *osm = (PC_GASM*)pc->data;
1082   PetscErrorCode ierr;
1083 
1084   PetscFunctionBegin;
1085   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1086 
1087   if (n) *n = osm->n;
1088   if (first) {
1089     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
1090     *first -= osm->n;
1091   }
1092   if (ksp) {
1093     /* Assume that local solves are now different; not necessarily
1094        true, though!  This flag is used only for PCView_GASM() */
1095     *ksp                        = osm->ksp;
1096     osm->same_subdomain_solvers = PETSC_FALSE;
1097   }
1098   PetscFunctionReturn(0);
1099 } /* PCGASMGetSubKSP_GASM() */
1100 
1101 /*@C
1102     PCGASMSetSubdomains - Sets the subdomains for this processor
1103     for the additive Schwarz preconditioner.
1104 
1105     Collective on pc
1106 
1107     Input Parameters:
1108 +   pc  - the preconditioner object
1109 .   n   - the number of subdomains for this processor
1110 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1111 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1112 
1113     Notes:
1114     The IS indices use the parallel, global numbering of the vector entries.
1115     Inner subdomains are those where the correction is applied.
1116     Outer subdomains are those where the residual necessary to obtain the
1117     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1118     Both inner and outer subdomains can extend over several processors.
1119     This processor's portion of a subdomain is known as a local subdomain.
1120 
1121     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1122     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1123     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1124     on another MPI process.
1125 
1126     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1127 
1128     Level: advanced
1129 
1130 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1131           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1132 @*/
1133 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1134 {
1135   PC_GASM *osm = (PC_GASM*)pc->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1140   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1141   osm->dm_subdomains = PETSC_FALSE;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 /*@
1146     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1147     additive Schwarz preconditioner.  Either all or no processors in the
1148     pc communicator must call this routine.
1149 
1150     Logically Collective on pc
1151 
1152     Input Parameters:
1153 +   pc  - the preconditioner context
1154 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1155 
1156     Options Database Key:
1157 .   -pc_gasm_overlap <overlap> - Sets overlap
1158 
1159     Notes:
1160     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1161     multiple subdomain per perocessor or "straddling" subdomains that intersect
1162     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1163 
1164     The overlap defaults to 0, so if one desires that no additional
1165     overlap be computed beyond what may have been set with a call to
1166     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1167     not explicitly set the subdomains in application code, then all overlap would be computed
1168     internally by PETSc, and using an overlap of 0 would result in an GASM
1169     variant that is equivalent to the block Jacobi preconditioner.
1170 
1171     Note that one can define initial index sets with any overlap via
1172     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1173     PETSc to extend that overlap further, if desired.
1174 
1175     Level: intermediate
1176 
1177 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1178           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1179 @*/
1180 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1181 {
1182   PetscErrorCode ierr;
1183   PC_GASM *osm = (PC_GASM*)pc->data;
1184 
1185   PetscFunctionBegin;
1186   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1187   PetscValidLogicalCollectiveInt(pc,ovl,2);
1188   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1189   osm->dm_subdomains = PETSC_FALSE;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 /*@
1194     PCGASMSetType - Sets the type of restriction and interpolation used
1195     for local problems in the additive Schwarz method.
1196 
1197     Logically Collective on PC
1198 
1199     Input Parameters:
1200 +   pc  - the preconditioner context
1201 -   type - variant of GASM, one of
1202 .vb
1203       PC_GASM_BASIC       - full interpolation and restriction
1204       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1205       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1206       PC_GASM_NONE        - local processor restriction and interpolation
1207 .ve
1208 
1209     Options Database Key:
1210 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1211 
1212     Level: intermediate
1213 
1214 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1215           PCGASMCreateSubdomains2D()
1216 @*/
1217 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1218 {
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1223   PetscValidLogicalCollectiveEnum(pc,type,2);
1224   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*@
1229     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1230 
1231     Logically Collective on PC
1232 
1233     Input Parameters:
1234 +   pc  - the preconditioner context
1235 -   doSort - sort the subdomain indices
1236 
1237     Level: intermediate
1238 
1239 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1240           PCGASMCreateSubdomains2D()
1241 @*/
1242 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1243 {
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1248   PetscValidLogicalCollectiveBool(pc,doSort,2);
1249   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 /*@C
1254    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1255    this processor.
1256 
1257    Collective on PC iff first_local is requested
1258 
1259    Input Parameter:
1260 .  pc - the preconditioner context
1261 
1262    Output Parameters:
1263 +  n_local - the number of blocks on this processor or NULL
1264 .  first_local - the global number of the first block on this processor or NULL,
1265                  all processors must request or all must pass NULL
1266 -  ksp - the array of KSP contexts
1267 
1268    Note:
1269    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1270 
1271    Currently for some matrix implementations only 1 block per processor
1272    is supported.
1273 
1274    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1275 
1276    Level: advanced
1277 
1278 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1279           PCGASMCreateSubdomains2D(),
1280 @*/
1281 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1282 {
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1287   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 /* -------------------------------------------------------------------------------------*/
1292 /*MC
1293    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1294            its own KSP object.
1295 
1296    Options Database Keys:
1297 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1298 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1299 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1300 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1301 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1302 
1303      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1304       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1305       -pc_gasm_type basic to use the standard GASM.
1306 
1307    Notes:
1308     Blocks can be shared by multiple processes.
1309 
1310      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1311         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1312 
1313      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1314          and set the options directly on the resulting KSP object (you can access its PC
1315          with KSPGetPC())
1316 
1317    Level: beginner
1318 
1319     References:
1320 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1321      Courant Institute, New York University Technical report
1322 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1323     Cambridge University Press.
1324 
1325 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1326            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1327            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1328 
1329 M*/
1330 
1331 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1332 {
1333   PetscErrorCode ierr;
1334   PC_GASM        *osm;
1335 
1336   PetscFunctionBegin;
1337   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1338 
1339   osm->N                        = PETSC_DETERMINE;
1340   osm->n                        = PETSC_DECIDE;
1341   osm->nmax                     = PETSC_DETERMINE;
1342   osm->overlap                  = 0;
1343   osm->ksp                      = NULL;
1344   osm->gorestriction            = NULL;
1345   osm->girestriction            = NULL;
1346   osm->pctoouter                = NULL;
1347   osm->gx                       = NULL;
1348   osm->gy                       = NULL;
1349   osm->x                        = NULL;
1350   osm->y                        = NULL;
1351   osm->pcx                      = NULL;
1352   osm->pcy                      = NULL;
1353   osm->permutationIS            = NULL;
1354   osm->permutationP             = NULL;
1355   osm->pcmat                    = NULL;
1356   osm->ois                      = NULL;
1357   osm->iis                      = NULL;
1358   osm->pmat                     = NULL;
1359   osm->type                     = PC_GASM_RESTRICT;
1360   osm->same_subdomain_solvers   = PETSC_TRUE;
1361   osm->sort_indices             = PETSC_TRUE;
1362   osm->dm_subdomains            = PETSC_FALSE;
1363   osm->hierarchicalpartitioning = PETSC_FALSE;
1364 
1365   pc->data                 = (void*)osm;
1366   pc->ops->apply           = PCApply_GASM;
1367   pc->ops->matapply        = PCMatApply_GASM;
1368   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1369   pc->ops->setup           = PCSetUp_GASM;
1370   pc->ops->reset           = PCReset_GASM;
1371   pc->ops->destroy         = PCDestroy_GASM;
1372   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1373   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1374   pc->ops->view            = PCView_GASM;
1375   pc->ops->applyrichardson = NULL;
1376 
1377   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1378   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1379   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1380   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1381   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1386 {
1387   MatPartitioning mpart;
1388   const char      *prefix;
1389   PetscInt        i,j,rstart,rend,bs;
1390   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1391   Mat             Ad     = NULL, adj;
1392   IS              ispart,isnumb,*is;
1393   PetscErrorCode  ierr;
1394 
1395   PetscFunctionBegin;
1396   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1397 
1398   /* Get prefix, row distribution, and block size */
1399   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1400   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1401   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1402   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1403 
1404   /* Get diagonal block from matrix if possible */
1405   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1406   if (hasop) {
1407     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1408   }
1409   if (Ad) {
1410     ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1411     if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1412   }
1413   if (Ad && nloc > 1) {
1414     PetscBool  match,done;
1415     /* Try to setup a good matrix partitioning if available */
1416     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1417     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1418     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1419     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1420     if (!match) {
1421       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1422     }
1423     if (!match) { /* assume a "good" partitioner is available */
1424       PetscInt       na;
1425       const PetscInt *ia,*ja;
1426       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1427       if (done) {
1428         /* Build adjacency matrix by hand. Unfortunately a call to
1429            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1430            remove the block-aij structure and we cannot expect
1431            MatPartitioning to split vertices as we need */
1432         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1433         const PetscInt *row;
1434         nnz = 0;
1435         for (i=0; i<na; i++) { /* count number of nonzeros */
1436           len = ia[i+1] - ia[i];
1437           row = ja + ia[i];
1438           for (j=0; j<len; j++) {
1439             if (row[j] == i) { /* don't count diagonal */
1440               len--; break;
1441             }
1442           }
1443           nnz += len;
1444         }
1445         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1446         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1447         nnz    = 0;
1448         iia[0] = 0;
1449         for (i=0; i<na; i++) { /* fill adjacency */
1450           cnt = 0;
1451           len = ia[i+1] - ia[i];
1452           row = ja + ia[i];
1453           for (j=0; j<len; j++) {
1454             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1455           }
1456           nnz += cnt;
1457           iia[i+1] = nnz;
1458         }
1459         /* Partitioning of the adjacency matrix */
1460         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1461         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1462         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1463         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1464         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1465         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1466         foundpart = PETSC_TRUE;
1467       }
1468       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1469     }
1470     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1471   }
1472   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1473   if (!foundpart) {
1474 
1475     /* Partitioning by contiguous chunks of rows */
1476 
1477     PetscInt mbs   = (rend-rstart)/bs;
1478     PetscInt start = rstart;
1479     for (i=0; i<nloc; i++) {
1480       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1481       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1482       start += count;
1483     }
1484 
1485   } else {
1486 
1487     /* Partitioning by adjacency of diagonal block  */
1488 
1489     const PetscInt *numbering;
1490     PetscInt       *count,nidx,*indices,*newidx,start=0;
1491     /* Get node count in each partition */
1492     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1493     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1494     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1495       for (i=0; i<nloc; i++) count[i] *= bs;
1496     }
1497     /* Build indices from node numbering */
1498     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1499     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1500     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1501     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1502     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1503     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1504     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1505       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1506       for (i=0; i<nidx; i++) {
1507         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1508       }
1509       ierr    = PetscFree(indices);CHKERRQ(ierr);
1510       nidx   *= bs;
1511       indices = newidx;
1512     }
1513     /* Shift to get global indices */
1514     for (i=0; i<nidx; i++) indices[i] += rstart;
1515 
1516     /* Build the index sets for each block */
1517     for (i=0; i<nloc; i++) {
1518       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1519       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1520       start += count[i];
1521     }
1522 
1523     ierr = PetscFree(count);CHKERRQ(ierr);
1524     ierr = PetscFree(indices);CHKERRQ(ierr);
1525     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1526     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1527   }
1528   *iis = is;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1533 {
1534   PetscErrorCode  ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /*@C
1542    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1543    Schwarz preconditioner for a any problem based on its matrix.
1544 
1545    Collective
1546 
1547    Input Parameters:
1548 +  A       - The global matrix operator
1549 -  N       - the number of global subdomains requested
1550 
1551    Output Parameters:
1552 +  n   - the number of subdomains created on this processor
1553 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1554 
1555    Level: advanced
1556 
1557    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1558          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1559          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1560          outer subdomains will be automatically generated from these according to the requested amount of
1561          overlap; this is currently supported only with local subdomains.
1562 
1563 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1564 @*/
1565 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1566 {
1567   PetscMPIInt     size;
1568   PetscErrorCode  ierr;
1569 
1570   PetscFunctionBegin;
1571   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1572   PetscValidPointer(iis,4);
1573 
1574   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1575   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1576   if (N >= size) {
1577     *n = N/size + (N%size);
1578     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1579   } else {
1580     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 /*@C
1586    PCGASMDestroySubdomains - Destroys the index sets created with
1587    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1588    called after setting subdomains with PCGASMSetSubdomains().
1589 
1590    Collective
1591 
1592    Input Parameters:
1593 +  n   - the number of index sets
1594 .  iis - the array of inner subdomains,
1595 -  ois - the array of outer subdomains, can be NULL
1596 
1597    Level: intermediate
1598 
1599    Notes:
1600     this is merely a convenience subroutine that walks each list,
1601    destroys each IS on the list, and then frees the list. At the end the
1602    list pointers are set to NULL.
1603 
1604 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1605 @*/
1606 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1607 {
1608   PetscInt       i;
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   if (n <= 0) PetscFunctionReturn(0);
1613   if (ois) {
1614     PetscValidPointer(ois,3);
1615     if (*ois) {
1616       PetscValidPointer(*ois,3);
1617       for (i=0; i<n; i++) {
1618         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1619       }
1620       ierr = PetscFree((*ois));CHKERRQ(ierr);
1621     }
1622   }
1623   if (iis) {
1624     PetscValidPointer(iis,2);
1625     if (*iis) {
1626       PetscValidPointer(*iis,2);
1627       for (i=0; i<n; i++) {
1628         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1629       }
1630       ierr = PetscFree((*iis));CHKERRQ(ierr);
1631     }
1632   }
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1637   {                                                                                                       \
1638     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1639     /*                                                                                                    \
1640      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1641      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1642      subdomain).                                                                                          \
1643      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1644      of the intersection.                                                                                 \
1645     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1646     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1647     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1648     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1649     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1650     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1651     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1652     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1653     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1654     /* Now compute the size of the local subdomain n. */ \
1655     *n = 0;                                               \
1656     if (*ylow_loc < *yhigh_loc) {                           \
1657       PetscInt width = xright-xleft;                     \
1658       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1659       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1660       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1661     } \
1662   }
1663 
1664 /*@
1665    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1666    preconditioner for a two-dimensional problem on a regular grid.
1667 
1668    Collective
1669 
1670    Input Parameters:
1671 +  M, N               - the global number of grid points in the x and y directions
1672 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1673 .  dof                - degrees of freedom per node
1674 -  overlap            - overlap in mesh lines
1675 
1676    Output Parameters:
1677 +  Nsub - the number of local subdomains created
1678 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1679 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1680 
1681    Level: advanced
1682 
1683 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1684 @*/
1685 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1686 {
1687   PetscErrorCode ierr;
1688   PetscMPIInt    size, rank;
1689   PetscInt       i, j;
1690   PetscInt       maxheight, maxwidth;
1691   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1692   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1693   PetscInt       x[2][2], y[2][2], n[2];
1694   PetscInt       first, last;
1695   PetscInt       nidx, *idx;
1696   PetscInt       ii,jj,s,q,d;
1697   PetscInt       k,kk;
1698   PetscMPIInt    color;
1699   MPI_Comm       comm, subcomm;
1700   IS             **xis = NULL, **is = ois, **is_local = iis;
1701 
1702   PetscFunctionBegin;
1703   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1704   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1705   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
1706   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1707   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1708                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1709 
1710   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1711   s      = 0;
1712   ystart = 0;
1713   for (j=0; j<Ndomains; ++j) {
1714     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1715     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1716     /* Vertical domain limits with an overlap. */
1717     ylow   = PetscMax(ystart - overlap,0);
1718     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1719     xstart = 0;
1720     for (i=0; i<Mdomains; ++i) {
1721       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1722       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1723       /* Horizontal domain limits with an overlap. */
1724       xleft  = PetscMax(xstart - overlap,0);
1725       xright = PetscMin(xstart + maxwidth + overlap,M);
1726       /*
1727          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1728       */
1729       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1730       if (nidx) ++s;
1731       xstart += maxwidth;
1732     } /* for (i = 0; i < Mdomains; ++i) */
1733     ystart += maxheight;
1734   } /* for (j = 0; j < Ndomains; ++j) */
1735 
1736   /* Now we can allocate the necessary number of ISs. */
1737   *nsub  = s;
1738   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1739   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1740   s      = 0;
1741   ystart = 0;
1742   for (j=0; j<Ndomains; ++j) {
1743     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1744     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1745     /* Vertical domain limits with an overlap. */
1746     y[0][0] = PetscMax(ystart - overlap,0);
1747     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1748     /* Vertical domain limits without an overlap. */
1749     y[1][0] = ystart;
1750     y[1][1] = PetscMin(ystart + maxheight,N);
1751     xstart  = 0;
1752     for (i=0; i<Mdomains; ++i) {
1753       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1754       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1755       /* Horizontal domain limits with an overlap. */
1756       x[0][0] = PetscMax(xstart - overlap,0);
1757       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1758       /* Horizontal domain limits without an overlap. */
1759       x[1][0] = xstart;
1760       x[1][1] = PetscMin(xstart+maxwidth,M);
1761       /*
1762          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1763          Do this twice: first for the domains with overlaps, and once without.
1764          During the first pass create the subcommunicators, and use them on the second pass as well.
1765       */
1766       for (q = 0; q < 2; ++q) {
1767         PetscBool split = PETSC_FALSE;
1768         /*
1769           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1770           according to whether the domain with an overlap or without is considered.
1771         */
1772         xleft = x[q][0]; xright = x[q][1];
1773         ylow  = y[q][0]; yhigh  = y[q][1];
1774         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1775         nidx *= dof;
1776         n[q]  = nidx;
1777         /*
1778          Based on the counted number of indices in the local domain *with an overlap*,
1779          construct a subcommunicator of all the processors supporting this domain.
1780          Observe that a domain with an overlap might have nontrivial local support,
1781          while the domain without an overlap might not.  Hence, the decision to participate
1782          in the subcommunicator must be based on the domain with an overlap.
1783          */
1784         if (q == 0) {
1785           if (nidx) color = 1;
1786           else color = MPI_UNDEFINED;
1787           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr);
1788           split = PETSC_TRUE;
1789         }
1790         /*
1791          Proceed only if the number of local indices *with an overlap* is nonzero.
1792          */
1793         if (n[0]) {
1794           if (q == 0) xis = is;
1795           if (q == 1) {
1796             /*
1797              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1798              Moreover, if the overlap is zero, the two ISs are identical.
1799              */
1800             if (overlap == 0) {
1801               (*is_local)[s] = (*is)[s];
1802               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1803               continue;
1804             } else {
1805               xis     = is_local;
1806               subcomm = ((PetscObject)(*is)[s])->comm;
1807             }
1808           } /* if (q == 1) */
1809           idx  = NULL;
1810           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1811           if (nidx) {
1812             k = 0;
1813             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1814               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1815               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1816               kk = dof*(M*jj + x0);
1817               for (ii=x0; ii<x1; ++ii) {
1818                 for (d = 0; d < dof; ++d) {
1819                   idx[k++] = kk++;
1820                 }
1821               }
1822             }
1823           }
1824           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1825           if (split) {
1826             ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr);
1827           }
1828         }/* if (n[0]) */
1829       }/* for (q = 0; q < 2; ++q) */
1830       if (n[0]) ++s;
1831       xstart += maxwidth;
1832     } /* for (i = 0; i < Mdomains; ++i) */
1833     ystart += maxheight;
1834   } /* for (j = 0; j < Ndomains; ++j) */
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 /*@C
1839     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1840     for the additive Schwarz preconditioner.
1841 
1842     Not Collective
1843 
1844     Input Parameter:
1845 .   pc - the preconditioner context
1846 
1847     Output Parameters:
1848 +   n   - the number of subdomains for this processor (default value = 1)
1849 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1850 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1851 
1852     Notes:
1853     The user is responsible for destroying the ISs and freeing the returned arrays.
1854     The IS numbering is in the parallel, global numbering of the vector.
1855 
1856     Level: advanced
1857 
1858 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1859           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1860 @*/
1861 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1862 {
1863   PC_GASM        *osm;
1864   PetscErrorCode ierr;
1865   PetscBool      match;
1866   PetscInt       i;
1867 
1868   PetscFunctionBegin;
1869   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1870   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1871   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1872   osm = (PC_GASM*)pc->data;
1873   if (n) *n = osm->n;
1874   if (iis) {
1875     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1876   }
1877   if (ois) {
1878     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1879   }
1880   if (iis || ois) {
1881     for (i = 0; i < osm->n; ++i) {
1882       if (iis) (*iis)[i] = osm->iis[i];
1883       if (ois) (*ois)[i] = osm->ois[i];
1884     }
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 /*@C
1890     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1891     only) for the additive Schwarz preconditioner.
1892 
1893     Not Collective
1894 
1895     Input Parameter:
1896 .   pc - the preconditioner context
1897 
1898     Output Parameters:
1899 +   n   - the number of matrices for this processor (default value = 1)
1900 -   mat - the matrices
1901 
1902     Notes:
1903     matrices returned by this routine have the same communicators as the index sets (IS)
1904            used to define subdomains in PCGASMSetSubdomains()
1905     Level: advanced
1906 
1907 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1908           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1909 @*/
1910 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1911 {
1912   PC_GASM        *osm;
1913   PetscErrorCode ierr;
1914   PetscBool      match;
1915 
1916   PetscFunctionBegin;
1917   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1918   PetscValidIntPointer(n,2);
1919   if (mat) PetscValidPointer(mat,3);
1920   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1921   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1922   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1923   osm = (PC_GASM*)pc->data;
1924   if (n) *n = osm->n;
1925   if (mat) *mat = osm->pmat;
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 /*@
1930     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1931     Logically Collective
1932 
1933     Input Parameter:
1934 +   pc  - the preconditioner
1935 -   flg - boolean indicating whether to use subdomains defined by the DM
1936 
1937     Options Database Key:
1938 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1939 
1940     Level: intermediate
1941 
1942     Notes:
1943     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1944     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1945     automatically turns the latter off.
1946 
1947 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1948           PCGASMCreateSubdomains2D()
1949 @*/
1950 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1951 {
1952   PC_GASM        *osm = (PC_GASM*)pc->data;
1953   PetscErrorCode ierr;
1954   PetscBool      match;
1955 
1956   PetscFunctionBegin;
1957   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1958   PetscValidLogicalCollectiveBool(pc,flg,2);
1959   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1960   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1961   if (match) {
1962     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1963       osm->dm_subdomains = flg;
1964     }
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 /*@
1970     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1971     Not Collective
1972 
1973     Input Parameter:
1974 .   pc  - the preconditioner
1975 
1976     Output Parameter:
1977 .   flg - boolean indicating whether to use subdomains defined by the DM
1978 
1979     Level: intermediate
1980 
1981 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1982           PCGASMCreateSubdomains2D()
1983 @*/
1984 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1985 {
1986   PC_GASM        *osm = (PC_GASM*)pc->data;
1987   PetscErrorCode ierr;
1988   PetscBool      match;
1989 
1990   PetscFunctionBegin;
1991   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1992   PetscValidBoolPointer(flg,2);
1993   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1994   if (match) {
1995     if (flg) *flg = osm->dm_subdomains;
1996   }
1997   PetscFunctionReturn(0);
1998 }
1999