xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73 #define len  16*(nidx+1)+1
74   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78   for (j = 0; j < nidx; ++j) {
79     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80   }
81   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91   ierr = PetscFree(cidx);CHKERRQ(ierr);
92   /* Outer subdomains. */
93   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94   /*
95    No more than 15 characters per index plus a space.
96    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98    For nidx == 0, the whole string 16 '\0'.
99    */
100 #define len  16*(nidx+1)+1
101   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
123 {
124   PC_GASM        *osm = (PC_GASM*)pc->data;
125   const char     *prefix;
126   char           fname[PETSC_MAX_PATH_LEN+1];
127   PetscInt       l, d, count;
128   PetscBool      doprint,found;
129   PetscViewer    viewer, sviewer = NULL;
130   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135   doprint  = PETSC_FALSE;
136   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
137   if (!doprint) PetscFunctionReturn(0);
138   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
139   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
140   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
141   /*
142    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144   */
145   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
146   l    = 0;
147   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
148   for (count = 0; count < osm->N; ++count) {
149     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150     if (l<osm->n) {
151       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152       if (numbering[d] == count) {
153         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
154         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
155         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156         ++l;
157       }
158     }
159     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
160   }
161   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
162   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167 {
168   PC_GASM        *osm = (PC_GASM*)pc->data;
169   const char     *prefix;
170   PetscErrorCode ierr;
171   PetscMPIInt    rank, size;
172   PetscInt       bsz;
173   PetscBool      iascii,view_subdomains=PETSC_FALSE;
174   PetscViewer    sviewer;
175   PetscInt       count, l;
176   char           overlap[256]     = "user-defined overlap";
177   char           gsubdomains[256] = "unknown total number of subdomains";
178   char           msubdomains[256] = "unknown max number of local subdomains";
179   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
180 
181   PetscFunctionBegin;
182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRMPI(ierr);
183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRMPI(ierr);
184 
185   if (osm->overlap >= 0) {
186     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
187   }
188   if (osm->N != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
190   }
191   if (osm->nmax != PETSC_DETERMINE) {
192     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
193   }
194 
195   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
196   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
197 
198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
199   if (iascii) {
200     /*
201      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203      collectively on the comm.
204      */
205     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
208     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
209     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
211     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
212     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
214     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
217     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
218     /* Make sure that everybody waits for the banner to be printed. */
219     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
220     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
222     l = 0;
223     for (count = 0; count < osm->N; ++count) {
224       PetscMPIInt srank, ssize;
225       if (l<osm->n) {
226         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227         if (numbering[d] == count) {
228           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRMPI(ierr);
229           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRMPI(ierr);
230           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
231           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
232           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
233           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
234           if (view_subdomains) {
235             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
236           }
237           if (!pc->setupcalled) {
238             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
239           } else {
240             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
241           }
242           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
243           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
244           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
245           ++l;
246         }
247       }
248       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr);
249     }
250     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
251     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
253     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
254     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
260 
261 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
262 {
263    PC_GASM              *osm = (PC_GASM*)pc->data;
264    MatPartitioning       part;
265    MPI_Comm              comm;
266    PetscMPIInt           size;
267    PetscInt              nlocalsubdomains,fromrows_localsize;
268    IS                    partitioning,fromrows,isn;
269    Vec                   outervec;
270    PetscErrorCode        ierr;
271 
272    PetscFunctionBegin;
273    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
274    ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
275    /* we do not need a hierarchical partitioning when
276     * the total number of subdomains is consistent with
277     * the number of MPI tasks.
278     * For the following cases, we do not need to use HP
279     * */
280    if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
281    if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
282    nlocalsubdomains = size/osm->N;
283    osm->n           = 1;
284    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
285    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
286    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
287    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
288    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
289    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
290    /* get new processor owner number of each vertex */
291    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
292    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
293    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
294    ierr = ISDestroy(&isn);CHKERRQ(ierr);
295    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
296    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
297    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
298    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
299    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
300    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
301    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
302    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
303    osm->permutationIS = fromrows;
304    osm->pcmat =  pc->pmat;
305    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
306    pc->pmat = osm->permutationP;
307    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
308    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
309    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
310    osm->n           = PETSC_DETERMINE;
311    PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode PCSetUp_GASM(PC pc)
315 {
316   PC_GASM        *osm = (PC_GASM*)pc->data;
317   PetscErrorCode ierr;
318   PetscInt       i,nInnerIndices,nTotalInnerIndices;
319   PetscMPIInt    rank, size;
320   MatReuse       scall = MAT_REUSE_MATRIX;
321   KSP            ksp;
322   PC             subpc;
323   const char     *prefix,*pprefix;
324   Vec            x,y;
325   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
326   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
327   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
328   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
329   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
330   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
331   PetscScalar    *gxarray, *gyarray;
332   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
333   PetscInt       num_subdomains    = 0;
334   DM             *subdomain_dm     = NULL;
335   char           **subdomain_names = NULL;
336   PetscInt       *numbering;
337 
338 
339   PetscFunctionBegin;
340   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
341   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
342   if (!pc->setupcalled) {
343         /* use a hierarchical partitioning */
344     if (osm->hierarchicalpartitioning){
345       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
346     }
347     if (osm->n == PETSC_DETERMINE) {
348       if (osm->N != PETSC_DETERMINE) {
349            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
350            ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
351       } else if (osm->dm_subdomains && pc->dm) {
352         /* try pc->dm next, if allowed */
353         PetscInt  d;
354         IS       *inner_subdomain_is, *outer_subdomain_is;
355         ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
356         if (num_subdomains) {
357           ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
358         }
359         for (d = 0; d < num_subdomains; ++d) {
360           if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
361           if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
362         }
363         ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
364         ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
365       } else {
366         /* still no subdomains; use one per processor */
367         osm->nmax = osm->n = 1;
368         ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
369         osm->N    = size;
370         ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
371       }
372     }
373     if (!osm->iis) {
374       /*
375        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
376        We create the requisite number of local inner subdomains and then expand them into
377        out subdomains, if necessary.
378        */
379       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
380     }
381     if (!osm->ois) {
382       /*
383             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
384             has been requested, copy the inner subdomains over so they can be modified.
385       */
386       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
387       for (i=0; i<osm->n; ++i) {
388         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
389           ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
390           ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
391         } else {
392           ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
393           osm->ois[i] = osm->iis[i];
394         }
395       }
396       if (osm->overlap>0 && osm->N>1) {
397         /* Extend the "overlapping" regions by a number of steps */
398         ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
399       }
400     }
401 
402     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
403     if (osm->nmax == PETSC_DETERMINE) {
404       PetscMPIInt inwork,outwork;
405       /* determine global number of subdomains and the max number of local subdomains */
406       inwork     = osm->n;
407       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
408       osm->nmax  = outwork;
409     }
410     if (osm->N == PETSC_DETERMINE) {
411       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
412       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
413     }
414 
415     if (osm->sort_indices) {
416       for (i=0; i<osm->n; i++) {
417         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
418         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
419       }
420     }
421     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
422     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
423 
424     /*
425        Merge the ISs, create merged vectors and restrictions.
426      */
427     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
428     on = 0;
429     for (i=0; i<osm->n; i++) {
430       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
431       on  += oni;
432     }
433     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
434     on   = 0;
435     /* Merge local indices together */
436     for (i=0; i<osm->n; i++) {
437       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
438       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
439       ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
440       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
441       on  += oni;
442     }
443     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
444     nTotalInnerIndices = 0;
445     for (i=0; i<osm->n; i++){
446       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
447       nTotalInnerIndices += nInnerIndices;
448     }
449     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
450     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
451 
452     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
453     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
454     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
455     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
456     /* gois might indices not on local */
457     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
458     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
459     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
460     ierr = VecDestroy(&x);CHKERRQ(ierr);
461     ierr = ISDestroy(&gois);CHKERRQ(ierr);
462 
463     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
464     {
465       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
466       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
467       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
468       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
469       IS              giis;          /* IS for the disjoint union of inner subdomains. */
470       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
471       PetscScalar    *array;
472       const PetscInt *indices;
473       PetscInt        k;
474       on = 0;
475       for (i=0; i<osm->n; i++) {
476         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
477         on  += oni;
478       }
479       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
480       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
481       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
482       /* set communicator id to determine where overlap is */
483       in   = 0;
484       for (i=0; i<osm->n; i++) {
485         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
486         for (k = 0; k < ini; ++k){
487           array[in+k] = numbering[i];
488         }
489         in += ini;
490       }
491       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
492       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
494       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
495       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
496       on  = 0;
497       in  = 0;
498       for (i=0; i<osm->n; i++) {
499         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
500         ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
501         for (k=0; k<oni; k++) {
502           /*  skip overlapping indices to get inner domain */
503           if (PetscRealPart(array[on+k]) != numbering[i]) continue;
504           iidx[in]    = indices[k];
505           ioidx[in++] = gostart+on+k;
506         }
507         ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
508         on += oni;
509       }
510       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
511       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
512       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
513       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
514       ierr = VecDestroy(&y);CHKERRQ(ierr);
515       ierr = ISDestroy(&giis);CHKERRQ(ierr);
516       ierr = ISDestroy(&giois);CHKERRQ(ierr);
517     }
518     ierr = ISDestroy(&goid);CHKERRQ(ierr);
519     ierr = PetscFree(numbering);CHKERRQ(ierr);
520 
521     /* Create the subdomain work vectors. */
522     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
523     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
524     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
525     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
526     for (i=0, on=0; i<osm->n; ++i, on += oni) {
527       PetscInt oNi;
528       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
529       /* on a sub communicator */
530       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
531       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
532       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
533     }
534     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
535     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
536     /* Create the subdomain solvers */
537     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
538     for (i=0; i<osm->n; i++) {
539       char subprefix[PETSC_MAX_PATH_LEN+1];
540       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
541       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
542       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
543       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
544       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
545       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
546       if (subdomain_dm) {
547         ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
548         ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
549       }
550       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
551       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
552       if (subdomain_names && subdomain_names[i]) {
553         ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
554         ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
555         ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
556       }
557       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
558       osm->ksp[i] = ksp;
559     }
560     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
561     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
562     scall = MAT_INITIAL_MATRIX;
563   } else { /* if (pc->setupcalled) */
564     /*
565        Destroy the submatrices from the previous iteration
566     */
567     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
568       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
569       scall = MAT_INITIAL_MATRIX;
570     }
571     if (osm->permutationIS){
572       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
573       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
574       osm->pcmat = pc->pmat;
575       pc->pmat   = osm->permutationP;
576     }
577   }
578 
579   /*
580      Extract the submatrices.
581   */
582   if (size > 1) {
583     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
584   } else {
585     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
586   }
587   if (scall == MAT_INITIAL_MATRIX) {
588     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
589     for (i=0; i<osm->n; i++) {
590       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
591       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
592     }
593   }
594 
595   /* Return control to the user so that the submatrices can be modified (e.g., to apply
596      different boundary conditions for the submatrices than for the global problem) */
597   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
598 
599   /*
600      Loop over submatrices putting them into local ksps
601   */
602   for (i=0; i<osm->n; i++) {
603     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
604     if (!pc->setupcalled) {
605       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
606     }
607   }
608   if (osm->pcmat){
609     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
610     pc->pmat   = osm->pcmat;
611     osm->pcmat = NULL;
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
617 {
618   PC_GASM        *osm = (PC_GASM*)pc->data;
619   PetscErrorCode ierr;
620   PetscInt       i;
621 
622   PetscFunctionBegin;
623   for (i=0; i<osm->n; i++) {
624     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
630 {
631   PC_GASM        *osm = (PC_GASM*)pc->data;
632   PetscErrorCode ierr;
633   PetscInt       i;
634   Vec            x,y;
635   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
636 
637   PetscFunctionBegin;
638   if (osm->pctoouter) {
639     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
640     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
641     x = osm->pcx;
642     y = osm->pcy;
643   } else {
644     x = xin;
645     y = yout;
646   }
647   /*
648      support for limiting the restriction or interpolation only to the inner
649      subdomain values (leaving the other values 0).
650   */
651   if (!(osm->type & PC_GASM_RESTRICT)) {
652     /* have to zero the work RHS since scatter may leave some slots empty */
653     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
654     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
655   } else {
656     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
657   }
658   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
659   if (!(osm->type & PC_GASM_RESTRICT)) {
660     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
661   } else {
662     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
663   }
664   /* do the subdomain solves */
665   for (i=0; i<osm->n; ++i) {
666     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
667     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
668   }
669   /* do we need to zero y? */
670   ierr = VecZeroEntries(y);CHKERRQ(ierr);
671   if (!(osm->type & PC_GASM_INTERPOLATE)) {
672     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
673     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
674   } else {
675     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
676     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
677   }
678   if (osm->pctoouter) {
679     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
680     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
681   }
682   PetscFunctionReturn(0);
683 }
684 
685 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
686 {
687   PC_GASM        *osm = (PC_GASM*)pc->data;
688   Mat            X,Y,O=NULL,Z,W;
689   Vec            x,y;
690   PetscInt       i,m,M,N;
691   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBegin;
695   if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
696   ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
697   if (osm->pctoouter) {
698     ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
699     ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
700     ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
701     for (i = 0; i < N; ++i) {
702       ierr = MatDenseGetColumnVecRead(Xin,i,&x);
703       ierr = MatDenseGetColumnVecWrite(O,i,&y);
704       ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
705       ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
706       ierr = MatDenseRestoreColumnVecWrite(O,i,&y);
707       ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);
708     }
709     X = Y = O;
710   } else {
711     X = Xin;
712     Y = Yout;
713   }
714   /*
715      support for limiting the restriction or interpolation only to the inner
716      subdomain values (leaving the other values 0).
717   */
718   ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
719   ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
720   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
721   for (i = 0; i < N; ++i) {
722     ierr = MatDenseGetColumnVecRead(X,i,&x);
723     ierr = MatDenseGetColumnVecWrite(Z,i,&y);
724     if (!(osm->type & PC_GASM_RESTRICT)) {
725       /* have to zero the work RHS since scatter may leave some slots empty */
726       ierr = VecZeroEntries(y);CHKERRQ(ierr);
727       ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
728       ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
729     } else {
730       ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
731       ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
732     }
733     ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);
734     ierr = MatDenseRestoreColumnVecRead(X,i,&x);
735   }
736   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
737   ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
740   /* do the subdomain solve */
741   ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
742   ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
743   ierr = MatDestroy(&Z);CHKERRQ(ierr);
744   /* do we need to zero y? */
745   ierr = MatZeroEntries(Y);CHKERRQ(ierr);
746   for (i = 0; i < N; ++i) {
747     ierr = MatDenseGetColumnVecWrite(Y,i,&y);
748     ierr = MatDenseGetColumnVecRead(W,i,&x);
749     if (!(osm->type & PC_GASM_INTERPOLATE)) {
750       ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
751       ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
752     } else {
753       ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
754       ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
755     }
756     ierr = MatDenseRestoreColumnVecRead(W,i,&x);
757     if (osm->pctoouter) {
758       ierr = MatDenseGetColumnVecWrite(Yout,i,&x);
759       ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760       ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
761       ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);
762     }
763     ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);
764   }
765   ierr = MatDestroy(&W);CHKERRQ(ierr);
766   ierr = MatDestroy(&O);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
771 {
772   PC_GASM        *osm = (PC_GASM*)pc->data;
773   PetscErrorCode ierr;
774   PetscInt       i;
775   Vec            x,y;
776   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
777 
778   PetscFunctionBegin;
779   if (osm->pctoouter){
780    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
781    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782    x = osm->pcx;
783    y = osm->pcy;
784   }else{
785         x = xin;
786         y = yout;
787   }
788   /*
789      Support for limiting the restriction or interpolation to only local
790      subdomain values (leaving the other values 0).
791 
792      Note: these are reversed from the PCApply_GASM() because we are applying the
793      transpose of the three terms
794   */
795   if (!(osm->type & PC_GASM_INTERPOLATE)) {
796     /* have to zero the work RHS since scatter may leave some slots empty */
797     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
798     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
799   } else {
800     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
801   }
802   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
803   if (!(osm->type & PC_GASM_INTERPOLATE)) {
804     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
805   } else {
806     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
807   }
808   /* do the local solves */
809   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
810     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
811     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
812   }
813   ierr = VecZeroEntries(y);CHKERRQ(ierr);
814   if (!(osm->type & PC_GASM_RESTRICT)) {
815     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
816     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
817   } else {
818     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
819     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
820   }
821   if (osm->pctoouter){
822    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   }
825   PetscFunctionReturn(0);
826 }
827 
828 static PetscErrorCode PCReset_GASM(PC pc)
829 {
830   PC_GASM        *osm = (PC_GASM*)pc->data;
831   PetscErrorCode ierr;
832   PetscInt       i;
833 
834   PetscFunctionBegin;
835   if (osm->ksp) {
836     for (i=0; i<osm->n; i++) {
837       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
838     }
839   }
840   if (osm->pmat) {
841     if (osm->n > 0) {
842       PetscMPIInt size;
843       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
844       if (size > 1) {
845         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
846         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
847       } else {
848         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
849       }
850     }
851   }
852   if (osm->x) {
853     for (i=0; i<osm->n; i++) {
854       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
855       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
856     }
857   }
858   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
859   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
860 
861   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
862   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
863   if (!osm->user_subdomains) {
864     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
865     osm->N    = PETSC_DETERMINE;
866     osm->nmax = PETSC_DETERMINE;
867   }
868   if (osm->pctoouter){
869         ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
870   }
871   if (osm->permutationIS){
872         ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
873   }
874   if (osm->pcx){
875         ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
876   }
877   if (osm->pcy){
878         ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
879   }
880   if (osm->permutationP){
881     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
882   }
883   if (osm->pcmat){
884         ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscErrorCode PCDestroy_GASM(PC pc)
890 {
891   PC_GASM        *osm = (PC_GASM*)pc->data;
892   PetscErrorCode ierr;
893   PetscInt       i;
894 
895   PetscFunctionBegin;
896   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
897   /* PCReset will not destroy subdomains, if user_subdomains is true. */
898   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
899   if (osm->ksp) {
900     for (i=0; i<osm->n; i++) {
901       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
902     }
903     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
904   }
905   ierr = PetscFree(osm->x);CHKERRQ(ierr);
906   ierr = PetscFree(osm->y);CHKERRQ(ierr);
907   ierr = PetscFree(pc->data);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
912 {
913   PC_GASM        *osm = (PC_GASM*)pc->data;
914   PetscErrorCode ierr;
915   PetscInt       blocks,ovl;
916   PetscBool      flg;
917   PCGASMType     gasmtype;
918 
919   PetscFunctionBegin;
920   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
921   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
922   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
923   if (flg) {
924     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
925   }
926   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
927   if (flg) {
928     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
929     osm->dm_subdomains = PETSC_FALSE;
930   }
931   flg  = PETSC_FALSE;
932   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
933   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
934   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
935   ierr = PetscOptionsTail();CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 /*------------------------------------------------------------------------------------*/
940 
941 /*@
942     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
943                                communicator.
944     Logically collective on pc
945 
946     Input Parameters:
947 +   pc  - the preconditioner
948 -   N   - total number of subdomains
949 
950 
951     Level: beginner
952 
953 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
954           PCGASMCreateSubdomains2D()
955 @*/
956 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
957 {
958   PC_GASM        *osm = (PC_GASM*)pc->data;
959   PetscMPIInt    size,rank;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
964   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
965 
966   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
967   osm->ois = osm->iis = NULL;
968 
969   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRMPI(ierr);
970   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRMPI(ierr);
971   osm->N    = N;
972   osm->n    = PETSC_DETERMINE;
973   osm->nmax = PETSC_DETERMINE;
974   osm->dm_subdomains = PETSC_FALSE;
975   PetscFunctionReturn(0);
976 }
977 
978 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
979 {
980   PC_GASM         *osm = (PC_GASM*)pc->data;
981   PetscErrorCode  ierr;
982   PetscInt        i;
983 
984   PetscFunctionBegin;
985   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
986   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
987 
988   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
989   osm->iis  = osm->ois = NULL;
990   osm->n    = n;
991   osm->N    = PETSC_DETERMINE;
992   osm->nmax = PETSC_DETERMINE;
993   if (ois) {
994     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
995     for (i=0; i<n; i++) {
996       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
997       osm->ois[i] = ois[i];
998     }
999     /*
1000        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1001        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1002     */
1003     osm->overlap = -1;
1004     /* inner subdomains must be provided  */
1005     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1006   }/* end if */
1007   if (iis) {
1008     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1009     for (i=0; i<n; i++) {
1010       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1011       osm->iis[i] = iis[i];
1012     }
1013     if (!ois) {
1014       osm->ois = NULL;
1015       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
1016     }
1017   }
1018   if (PetscDefined(USE_DEBUG)) {
1019     PetscInt        j,rstart,rend,*covered,lsize;
1020     const PetscInt  *indices;
1021     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1022     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1023     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1024     /* check if the current processor owns indices from others */
1025     for (i=0; i<n; i++) {
1026       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1027       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1028       for (j=0; j<lsize; j++) {
1029         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1030         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1031         else covered[indices[j]-rstart] = 1;
1032       }
1033     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1034     }
1035     /* check if we miss any indices */
1036     for (i=rstart; i<rend; i++) {
1037       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1038     }
1039     ierr = PetscFree(covered);CHKERRQ(ierr);
1040   }
1041   if (iis)  osm->user_subdomains = PETSC_TRUE;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1046 {
1047   PC_GASM *osm = (PC_GASM*)pc->data;
1048 
1049   PetscFunctionBegin;
1050   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1051   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1052   if (!pc->setupcalled) osm->overlap = ovl;
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1057 {
1058   PC_GASM *osm = (PC_GASM*)pc->data;
1059 
1060   PetscFunctionBegin;
1061   osm->type     = type;
1062   osm->type_set = PETSC_TRUE;
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1067 {
1068   PC_GASM *osm = (PC_GASM*)pc->data;
1069 
1070   PetscFunctionBegin;
1071   osm->sort_indices = doSort;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 /*
1076    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1077         In particular, it would upset the global subdomain number calculation.
1078 */
1079 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1080 {
1081   PC_GASM        *osm = (PC_GASM*)pc->data;
1082   PetscErrorCode ierr;
1083 
1084   PetscFunctionBegin;
1085   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1086 
1087   if (n) *n = osm->n;
1088   if (first) {
1089     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1090     *first -= osm->n;
1091   }
1092   if (ksp) {
1093     /* Assume that local solves are now different; not necessarily
1094        true, though!  This flag is used only for PCView_GASM() */
1095     *ksp                        = osm->ksp;
1096     osm->same_subdomain_solvers = PETSC_FALSE;
1097   }
1098   PetscFunctionReturn(0);
1099 } /* PCGASMGetSubKSP_GASM() */
1100 
1101 /*@C
1102     PCGASMSetSubdomains - Sets the subdomains for this processor
1103     for the additive Schwarz preconditioner.
1104 
1105     Collective on pc
1106 
1107     Input Parameters:
1108 +   pc  - the preconditioner object
1109 .   n   - the number of subdomains for this processor
1110 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1111 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1112 
1113     Notes:
1114     The IS indices use the parallel, global numbering of the vector entries.
1115     Inner subdomains are those where the correction is applied.
1116     Outer subdomains are those where the residual necessary to obtain the
1117     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1118     Both inner and outer subdomains can extend over several processors.
1119     This processor's portion of a subdomain is known as a local subdomain.
1120 
1121     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1122     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1123     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1124     on another MPI process.
1125 
1126     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1127 
1128 
1129     Level: advanced
1130 
1131 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1132           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1133 @*/
1134 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1135 {
1136   PC_GASM *osm = (PC_GASM*)pc->data;
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1141   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1142   osm->dm_subdomains = PETSC_FALSE;
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 /*@
1147     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1148     additive Schwarz preconditioner.  Either all or no processors in the
1149     pc communicator must call this routine.
1150 
1151     Logically Collective on pc
1152 
1153     Input Parameters:
1154 +   pc  - the preconditioner context
1155 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1156 
1157     Options Database Key:
1158 .   -pc_gasm_overlap <overlap> - Sets overlap
1159 
1160     Notes:
1161     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1162     multiple subdomain per perocessor or "straddling" subdomains that intersect
1163     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1164 
1165     The overlap defaults to 0, so if one desires that no additional
1166     overlap be computed beyond what may have been set with a call to
1167     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1168     not explicitly set the subdomains in application code, then all overlap would be computed
1169     internally by PETSc, and using an overlap of 0 would result in an GASM
1170     variant that is equivalent to the block Jacobi preconditioner.
1171 
1172     Note that one can define initial index sets with any overlap via
1173     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1174     PETSc to extend that overlap further, if desired.
1175 
1176     Level: intermediate
1177 
1178 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1179           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1180 @*/
1181 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1182 {
1183   PetscErrorCode ierr;
1184   PC_GASM *osm = (PC_GASM*)pc->data;
1185 
1186   PetscFunctionBegin;
1187   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1188   PetscValidLogicalCollectiveInt(pc,ovl,2);
1189   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1190   osm->dm_subdomains = PETSC_FALSE;
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 /*@
1195     PCGASMSetType - Sets the type of restriction and interpolation used
1196     for local problems in the additive Schwarz method.
1197 
1198     Logically Collective on PC
1199 
1200     Input Parameters:
1201 +   pc  - the preconditioner context
1202 -   type - variant of GASM, one of
1203 .vb
1204       PC_GASM_BASIC       - full interpolation and restriction
1205       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1206       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1207       PC_GASM_NONE        - local processor restriction and interpolation
1208 .ve
1209 
1210     Options Database Key:
1211 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1212 
1213     Level: intermediate
1214 
1215 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1216           PCGASMCreateSubdomains2D()
1217 @*/
1218 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1219 {
1220   PetscErrorCode ierr;
1221 
1222   PetscFunctionBegin;
1223   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1224   PetscValidLogicalCollectiveEnum(pc,type,2);
1225   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*@
1230     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1231 
1232     Logically Collective on PC
1233 
1234     Input Parameters:
1235 +   pc  - the preconditioner context
1236 -   doSort - sort the subdomain indices
1237 
1238     Level: intermediate
1239 
1240 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1241           PCGASMCreateSubdomains2D()
1242 @*/
1243 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1244 {
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1249   PetscValidLogicalCollectiveBool(pc,doSort,2);
1250   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 /*@C
1255    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1256    this processor.
1257 
1258    Collective on PC iff first_local is requested
1259 
1260    Input Parameter:
1261 .  pc - the preconditioner context
1262 
1263    Output Parameters:
1264 +  n_local - the number of blocks on this processor or NULL
1265 .  first_local - the global number of the first block on this processor or NULL,
1266                  all processors must request or all must pass NULL
1267 -  ksp - the array of KSP contexts
1268 
1269    Note:
1270    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1271 
1272    Currently for some matrix implementations only 1 block per processor
1273    is supported.
1274 
1275    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1276 
1277    Level: advanced
1278 
1279 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1280           PCGASMCreateSubdomains2D(),
1281 @*/
1282 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1283 {
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1288   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 /* -------------------------------------------------------------------------------------*/
1293 /*MC
1294    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1295            its own KSP object.
1296 
1297    Options Database Keys:
1298 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1299 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1300 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1301 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1302 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1303 
1304      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1305       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1306       -pc_gasm_type basic to use the standard GASM.
1307 
1308    Notes:
1309     Blocks can be shared by multiple processes.
1310 
1311      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1312         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1313 
1314      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1315          and set the options directly on the resulting KSP object (you can access its PC
1316          with KSPGetPC())
1317 
1318 
1319    Level: beginner
1320 
1321     References:
1322 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1323      Courant Institute, New York University Technical report
1324 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1325     Cambridge University Press.
1326 
1327 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1328            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1329            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1330 
1331 M*/
1332 
1333 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1334 {
1335   PetscErrorCode ierr;
1336   PC_GASM        *osm;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1340 
1341   osm->N                        = PETSC_DETERMINE;
1342   osm->n                        = PETSC_DECIDE;
1343   osm->nmax                     = PETSC_DETERMINE;
1344   osm->overlap                  = 0;
1345   osm->ksp                      = NULL;
1346   osm->gorestriction            = NULL;
1347   osm->girestriction            = NULL;
1348   osm->pctoouter                = NULL;
1349   osm->gx                       = NULL;
1350   osm->gy                       = NULL;
1351   osm->x                        = NULL;
1352   osm->y                        = NULL;
1353   osm->pcx                      = NULL;
1354   osm->pcy                      = NULL;
1355   osm->permutationIS            = NULL;
1356   osm->permutationP             = NULL;
1357   osm->pcmat                    = NULL;
1358   osm->ois                      = NULL;
1359   osm->iis                      = NULL;
1360   osm->pmat                     = NULL;
1361   osm->type                     = PC_GASM_RESTRICT;
1362   osm->same_subdomain_solvers   = PETSC_TRUE;
1363   osm->sort_indices             = PETSC_TRUE;
1364   osm->dm_subdomains            = PETSC_FALSE;
1365   osm->hierarchicalpartitioning = PETSC_FALSE;
1366 
1367   pc->data                 = (void*)osm;
1368   pc->ops->apply           = PCApply_GASM;
1369   pc->ops->matapply        = PCMatApply_GASM;
1370   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1371   pc->ops->setup           = PCSetUp_GASM;
1372   pc->ops->reset           = PCReset_GASM;
1373   pc->ops->destroy         = PCDestroy_GASM;
1374   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1375   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1376   pc->ops->view            = PCView_GASM;
1377   pc->ops->applyrichardson = NULL;
1378 
1379   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1380   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1381   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1382   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1388 {
1389   MatPartitioning mpart;
1390   const char      *prefix;
1391   PetscInt        i,j,rstart,rend,bs;
1392   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1393   Mat             Ad     = NULL, adj;
1394   IS              ispart,isnumb,*is;
1395   PetscErrorCode  ierr;
1396 
1397   PetscFunctionBegin;
1398   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1399 
1400   /* Get prefix, row distribution, and block size */
1401   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1402   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1403   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1404   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1405 
1406   /* Get diagonal block from matrix if possible */
1407   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1408   if (hasop) {
1409     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1410   }
1411   if (Ad) {
1412     ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1413     if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1414   }
1415   if (Ad && nloc > 1) {
1416     PetscBool  match,done;
1417     /* Try to setup a good matrix partitioning if available */
1418     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1419     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1420     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1421     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1422     if (!match) {
1423       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1424     }
1425     if (!match) { /* assume a "good" partitioner is available */
1426       PetscInt       na;
1427       const PetscInt *ia,*ja;
1428       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1429       if (done) {
1430         /* Build adjacency matrix by hand. Unfortunately a call to
1431            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1432            remove the block-aij structure and we cannot expect
1433            MatPartitioning to split vertices as we need */
1434         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1435         const PetscInt *row;
1436         nnz = 0;
1437         for (i=0; i<na; i++) { /* count number of nonzeros */
1438           len = ia[i+1] - ia[i];
1439           row = ja + ia[i];
1440           for (j=0; j<len; j++) {
1441             if (row[j] == i) { /* don't count diagonal */
1442               len--; break;
1443             }
1444           }
1445           nnz += len;
1446         }
1447         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1448         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1449         nnz    = 0;
1450         iia[0] = 0;
1451         for (i=0; i<na; i++) { /* fill adjacency */
1452           cnt = 0;
1453           len = ia[i+1] - ia[i];
1454           row = ja + ia[i];
1455           for (j=0; j<len; j++) {
1456             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1457           }
1458           nnz += cnt;
1459           iia[i+1] = nnz;
1460         }
1461         /* Partitioning of the adjacency matrix */
1462         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1463         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1464         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1465         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1466         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1467         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1468         foundpart = PETSC_TRUE;
1469       }
1470       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1471     }
1472     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1473   }
1474   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1475   if (!foundpart) {
1476 
1477     /* Partitioning by contiguous chunks of rows */
1478 
1479     PetscInt mbs   = (rend-rstart)/bs;
1480     PetscInt start = rstart;
1481     for (i=0; i<nloc; i++) {
1482       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1483       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1484       start += count;
1485     }
1486 
1487   } else {
1488 
1489     /* Partitioning by adjacency of diagonal block  */
1490 
1491     const PetscInt *numbering;
1492     PetscInt       *count,nidx,*indices,*newidx,start=0;
1493     /* Get node count in each partition */
1494     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1495     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1496     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1497       for (i=0; i<nloc; i++) count[i] *= bs;
1498     }
1499     /* Build indices from node numbering */
1500     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1501     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1502     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1503     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1504     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1505     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1506     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1507       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1508       for (i=0; i<nidx; i++) {
1509         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1510       }
1511       ierr    = PetscFree(indices);CHKERRQ(ierr);
1512       nidx   *= bs;
1513       indices = newidx;
1514     }
1515     /* Shift to get global indices */
1516     for (i=0; i<nidx; i++) indices[i] += rstart;
1517 
1518     /* Build the index sets for each block */
1519     for (i=0; i<nloc; i++) {
1520       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1521       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1522       start += count[i];
1523     }
1524 
1525     ierr = PetscFree(count);CHKERRQ(ierr);
1526     ierr = PetscFree(indices);CHKERRQ(ierr);
1527     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1528     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1529   }
1530   *iis = is;
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1535 {
1536   PetscErrorCode  ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 /*@C
1544    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1545    Schwarz preconditioner for a any problem based on its matrix.
1546 
1547    Collective
1548 
1549    Input Parameters:
1550 +  A       - The global matrix operator
1551 -  N       - the number of global subdomains requested
1552 
1553    Output Parameters:
1554 +  n   - the number of subdomains created on this processor
1555 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1556 
1557    Level: advanced
1558 
1559    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1560          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1561          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1562          outer subdomains will be automatically generated from these according to the requested amount of
1563          overlap; this is currently supported only with local subdomains.
1564 
1565 
1566 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1567 @*/
1568 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1569 {
1570   PetscMPIInt     size;
1571   PetscErrorCode  ierr;
1572 
1573   PetscFunctionBegin;
1574   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1575   PetscValidPointer(iis,4);
1576 
1577   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1578   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1579   if (N >= size) {
1580     *n = N/size + (N%size);
1581     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1582   } else {
1583     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 /*@C
1589    PCGASMDestroySubdomains - Destroys the index sets created with
1590    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1591    called after setting subdomains with PCGASMSetSubdomains().
1592 
1593    Collective
1594 
1595    Input Parameters:
1596 +  n   - the number of index sets
1597 .  iis - the array of inner subdomains,
1598 -  ois - the array of outer subdomains, can be NULL
1599 
1600    Level: intermediate
1601 
1602    Notes:
1603     this is merely a convenience subroutine that walks each list,
1604    destroys each IS on the list, and then frees the list. At the end the
1605    list pointers are set to NULL.
1606 
1607 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1608 @*/
1609 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1610 {
1611   PetscInt       i;
1612   PetscErrorCode ierr;
1613 
1614   PetscFunctionBegin;
1615   if (n <= 0) PetscFunctionReturn(0);
1616   if (ois) {
1617     PetscValidPointer(ois,3);
1618     if (*ois) {
1619       PetscValidPointer(*ois,3);
1620       for (i=0; i<n; i++) {
1621         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1622       }
1623       ierr = PetscFree((*ois));CHKERRQ(ierr);
1624     }
1625   }
1626   if (iis) {
1627     PetscValidPointer(iis,2);
1628     if (*iis) {
1629       PetscValidPointer(*iis,2);
1630       for (i=0; i<n; i++) {
1631         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1632       }
1633       ierr = PetscFree((*iis));CHKERRQ(ierr);
1634     }
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1640   {                                                                                                       \
1641     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1642     /*                                                                                                    \
1643      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1644      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1645      subdomain).                                                                                          \
1646      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1647      of the intersection.                                                                                 \
1648     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1649     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1650     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1651     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1652     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1653     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1654     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1655     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1656     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1657     /* Now compute the size of the local subdomain n. */ \
1658     *n = 0;                                               \
1659     if (*ylow_loc < *yhigh_loc) {                           \
1660       PetscInt width = xright-xleft;                     \
1661       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1662       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1663       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1664     } \
1665   }
1666 
1667 /*@
1668    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1669    preconditioner for a two-dimensional problem on a regular grid.
1670 
1671    Collective
1672 
1673    Input Parameters:
1674 +  M, N               - the global number of grid points in the x and y directions
1675 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1676 .  dof                - degrees of freedom per node
1677 -  overlap            - overlap in mesh lines
1678 
1679    Output Parameters:
1680 +  Nsub - the number of local subdomains created
1681 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1682 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1683 
1684 
1685    Level: advanced
1686 
1687 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1688 @*/
1689 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1690 {
1691   PetscErrorCode ierr;
1692   PetscMPIInt    size, rank;
1693   PetscInt       i, j;
1694   PetscInt       maxheight, maxwidth;
1695   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1696   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1697   PetscInt       x[2][2], y[2][2], n[2];
1698   PetscInt       first, last;
1699   PetscInt       nidx, *idx;
1700   PetscInt       ii,jj,s,q,d;
1701   PetscInt       k,kk;
1702   PetscMPIInt    color;
1703   MPI_Comm       comm, subcomm;
1704   IS             **xis = NULL, **is = ois, **is_local = iis;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1708   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1709   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
1710   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1711   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1712                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1713 
1714   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1715   s      = 0;
1716   ystart = 0;
1717   for (j=0; j<Ndomains; ++j) {
1718     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1719     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1720     /* Vertical domain limits with an overlap. */
1721     ylow   = PetscMax(ystart - overlap,0);
1722     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1723     xstart = 0;
1724     for (i=0; i<Mdomains; ++i) {
1725       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1726       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1727       /* Horizontal domain limits with an overlap. */
1728       xleft  = PetscMax(xstart - overlap,0);
1729       xright = PetscMin(xstart + maxwidth + overlap,M);
1730       /*
1731          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1732       */
1733       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1734       if (nidx) ++s;
1735       xstart += maxwidth;
1736     } /* for (i = 0; i < Mdomains; ++i) */
1737     ystart += maxheight;
1738   } /* for (j = 0; j < Ndomains; ++j) */
1739 
1740   /* Now we can allocate the necessary number of ISs. */
1741   *nsub  = s;
1742   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1743   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1744   s      = 0;
1745   ystart = 0;
1746   for (j=0; j<Ndomains; ++j) {
1747     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1748     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1749     /* Vertical domain limits with an overlap. */
1750     y[0][0] = PetscMax(ystart - overlap,0);
1751     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1752     /* Vertical domain limits without an overlap. */
1753     y[1][0] = ystart;
1754     y[1][1] = PetscMin(ystart + maxheight,N);
1755     xstart  = 0;
1756     for (i=0; i<Mdomains; ++i) {
1757       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1758       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1759       /* Horizontal domain limits with an overlap. */
1760       x[0][0] = PetscMax(xstart - overlap,0);
1761       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1762       /* Horizontal domain limits without an overlap. */
1763       x[1][0] = xstart;
1764       x[1][1] = PetscMin(xstart+maxwidth,M);
1765       /*
1766          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1767          Do this twice: first for the domains with overlaps, and once without.
1768          During the first pass create the subcommunicators, and use them on the second pass as well.
1769       */
1770       for (q = 0; q < 2; ++q) {
1771         PetscBool split = PETSC_FALSE;
1772         /*
1773           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1774           according to whether the domain with an overlap or without is considered.
1775         */
1776         xleft = x[q][0]; xright = x[q][1];
1777         ylow  = y[q][0]; yhigh  = y[q][1];
1778         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1779         nidx *= dof;
1780         n[q]  = nidx;
1781         /*
1782          Based on the counted number of indices in the local domain *with an overlap*,
1783          construct a subcommunicator of all the processors supporting this domain.
1784          Observe that a domain with an overlap might have nontrivial local support,
1785          while the domain without an overlap might not.  Hence, the decision to participate
1786          in the subcommunicator must be based on the domain with an overlap.
1787          */
1788         if (q == 0) {
1789           if (nidx) color = 1;
1790           else color = MPI_UNDEFINED;
1791           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRMPI(ierr);
1792           split = PETSC_TRUE;
1793         }
1794         /*
1795          Proceed only if the number of local indices *with an overlap* is nonzero.
1796          */
1797         if (n[0]) {
1798           if (q == 0) xis = is;
1799           if (q == 1) {
1800             /*
1801              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1802              Moreover, if the overlap is zero, the two ISs are identical.
1803              */
1804             if (overlap == 0) {
1805               (*is_local)[s] = (*is)[s];
1806               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1807               continue;
1808             } else {
1809               xis     = is_local;
1810               subcomm = ((PetscObject)(*is)[s])->comm;
1811             }
1812           } /* if (q == 1) */
1813           idx  = NULL;
1814           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1815           if (nidx) {
1816             k = 0;
1817             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1818               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1819               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1820               kk = dof*(M*jj + x0);
1821               for (ii=x0; ii<x1; ++ii) {
1822                 for (d = 0; d < dof; ++d) {
1823                   idx[k++] = kk++;
1824                 }
1825               }
1826             }
1827           }
1828           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1829           if (split) {
1830             ierr = MPI_Comm_free(&subcomm);CHKERRMPI(ierr);
1831           }
1832         }/* if (n[0]) */
1833       }/* for (q = 0; q < 2; ++q) */
1834       if (n[0]) ++s;
1835       xstart += maxwidth;
1836     } /* for (i = 0; i < Mdomains; ++i) */
1837     ystart += maxheight;
1838   } /* for (j = 0; j < Ndomains; ++j) */
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 /*@C
1843     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1844     for the additive Schwarz preconditioner.
1845 
1846     Not Collective
1847 
1848     Input Parameter:
1849 .   pc - the preconditioner context
1850 
1851     Output Parameters:
1852 +   n   - the number of subdomains for this processor (default value = 1)
1853 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1854 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1855 
1856 
1857     Notes:
1858     The user is responsible for destroying the ISs and freeing the returned arrays.
1859     The IS numbering is in the parallel, global numbering of the vector.
1860 
1861     Level: advanced
1862 
1863 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1864           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1865 @*/
1866 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1867 {
1868   PC_GASM        *osm;
1869   PetscErrorCode ierr;
1870   PetscBool      match;
1871   PetscInt       i;
1872 
1873   PetscFunctionBegin;
1874   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1875   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1876   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1877   osm = (PC_GASM*)pc->data;
1878   if (n) *n = osm->n;
1879   if (iis) {
1880     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1881   }
1882   if (ois) {
1883     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1884   }
1885   if (iis || ois) {
1886     for (i = 0; i < osm->n; ++i) {
1887       if (iis) (*iis)[i] = osm->iis[i];
1888       if (ois) (*ois)[i] = osm->ois[i];
1889     }
1890   }
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 /*@C
1895     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1896     only) for the additive Schwarz preconditioner.
1897 
1898     Not Collective
1899 
1900     Input Parameter:
1901 .   pc - the preconditioner context
1902 
1903     Output Parameters:
1904 +   n   - the number of matrices for this processor (default value = 1)
1905 -   mat - the matrices
1906 
1907     Notes:
1908     matrices returned by this routine have the same communicators as the index sets (IS)
1909            used to define subdomains in PCGASMSetSubdomains()
1910     Level: advanced
1911 
1912 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1913           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1914 @*/
1915 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1916 {
1917   PC_GASM        *osm;
1918   PetscErrorCode ierr;
1919   PetscBool      match;
1920 
1921   PetscFunctionBegin;
1922   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1923   PetscValidIntPointer(n,2);
1924   if (mat) PetscValidPointer(mat,3);
1925   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1926   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1927   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1928   osm = (PC_GASM*)pc->data;
1929   if (n) *n = osm->n;
1930   if (mat) *mat = osm->pmat;
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 /*@
1935     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1936     Logically Collective
1937 
1938     Input Parameter:
1939 +   pc  - the preconditioner
1940 -   flg - boolean indicating whether to use subdomains defined by the DM
1941 
1942     Options Database Key:
1943 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1944 
1945     Level: intermediate
1946 
1947     Notes:
1948     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1949     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1950     automatically turns the latter off.
1951 
1952 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1953           PCGASMCreateSubdomains2D()
1954 @*/
1955 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1956 {
1957   PC_GASM        *osm = (PC_GASM*)pc->data;
1958   PetscErrorCode ierr;
1959   PetscBool      match;
1960 
1961   PetscFunctionBegin;
1962   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1963   PetscValidLogicalCollectiveBool(pc,flg,2);
1964   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1965   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1966   if (match) {
1967     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1968       osm->dm_subdomains = flg;
1969     }
1970   }
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 /*@
1975     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1976     Not Collective
1977 
1978     Input Parameter:
1979 .   pc  - the preconditioner
1980 
1981     Output Parameter:
1982 .   flg - boolean indicating whether to use subdomains defined by the DM
1983 
1984     Level: intermediate
1985 
1986 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1987           PCGASMCreateSubdomains2D()
1988 @*/
1989 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1990 {
1991   PC_GASM        *osm = (PC_GASM*)pc->data;
1992   PetscErrorCode ierr;
1993   PetscBool      match;
1994 
1995   PetscFunctionBegin;
1996   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1997   PetscValidBoolPointer(flg,2);
1998   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1999   if (match) {
2000     if (flg) *flg = osm->dm_subdomains;
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004