xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision df0b854cade274ed9a7621633a20ef3035ae0f9f)
1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "PCGASMComputeGlobalSubdomainNumbering_Private"
41 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
42 {
43   PC_GASM        *osm = (PC_GASM*)pc->data;
44   PetscInt       i;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
49   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
50   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
51   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
52   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCGASMSubdomainView_Private"
58 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
59 {
60   PC_GASM        *osm = (PC_GASM*)pc->data;
61   PetscInt       j,nidx;
62   const PetscInt *idx;
63   PetscViewer    sviewer;
64   char           *cidx;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
69   /* Inner subdomains. */
70   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
71   /*
72    No more than 15 characters per index plus a space.
73    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
74    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
75    For nidx == 0, the whole string 16 '\0'.
76    */
77   ierr = PetscMalloc1(16*(nidx+1)+1, &cidx);CHKERRQ(ierr);
78   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);CHKERRQ(ierr);
79   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
80   for (j = 0; j < nidx; ++j) {
81     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
82   }
83   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
84   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
86   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
87   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
89   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
90   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
91   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
92   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
93   ierr = PetscFree(cidx);CHKERRQ(ierr);
94   /* Outer subdomains. */
95   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
96   /*
97    No more than 15 characters per index plus a space.
98    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
99    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
100    For nidx == 0, the whole string 16 '\0'.
101    */
102   ierr = PetscMalloc1(16*(nidx+1)+1, &cidx);CHKERRQ(ierr);
103   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);CHKERRQ(ierr);
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "PCGASMPrintSubdomains"
124 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
125 {
126   PC_GASM        *osm = (PC_GASM*)pc->data;
127   const char     *prefix;
128   char           fname[PETSC_MAX_PATH_LEN+1];
129   PetscInt       l, d, count;
130   PetscBool      doprint,found;
131   PetscViewer    viewer, sviewer = NULL;
132   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
133   PetscErrorCode ierr;
134 
135   PetscFunctionBegin;
136   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
137   doprint  = PETSC_FALSE;
138   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
139   if (!doprint) PetscFunctionReturn(0);
140   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
141   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
142   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
143   /*
144    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
145    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
146   */
147   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
148   l    = 0;
149   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
150   for (count = 0; count < osm->N; ++count) {
151     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
152     if (l<osm->n) {
153       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
154       if (numbering[d] == count) {
155         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
157         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
158         ++l;
159       }
160     }
161     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
162   }
163   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
164   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
165   PetscFunctionReturn(0);
166 }
167 
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "PCView_GASM"
171 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
172 {
173   PC_GASM        *osm = (PC_GASM*)pc->data;
174   const char     *prefix;
175   PetscErrorCode ierr;
176   PetscMPIInt    rank, size;
177   PetscInt       bsz;
178   PetscBool      iascii,view_subdomains=PETSC_FALSE;
179   PetscViewer    sviewer;
180   PetscInt       count, l;
181   char           overlap[256]     = "user-defined overlap";
182   char           gsubdomains[256] = "unknown total number of subdomains";
183   char           msubdomains[256] = "unknown max number of local subdomains";
184   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
185 
186   PetscFunctionBegin;
187   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr);
188   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr);
189 
190   if (osm->overlap >= 0) {
191     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
192   }
193   if (osm->N != PETSC_DETERMINE) {
194     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
195   }
196   if (osm->nmax != PETSC_DETERMINE) {
197     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
198   }
199 
200   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
201   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
202 
203   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
204   if (iascii) {
205     /*
206      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
207      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
208      collectively on the comm.
209      */
210     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
211     ierr = PetscViewerASCIIPrintf(viewer,"Generalized additive Schwarz:\n");CHKERRQ(ierr);
212     ierr = PetscViewerASCIIPrintf(viewer,"Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",overlap);CHKERRQ(ierr);
214     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",gsubdomains);CHKERRQ(ierr);
215     ierr = PetscViewerASCIIPrintf(viewer,"%s\n",msubdomains);CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
217     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
218     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
219     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
220     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
221     ierr = PetscViewerASCIIPrintf(viewer,"Subdomain solver info is as follows:\n");CHKERRQ(ierr);
222     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
223     ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
224     /* Make sure that everybody waits for the banner to be printed. */
225     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
226     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
227     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
228     l = 0;
229     for (count = 0; count < osm->N; ++count) {
230       PetscMPIInt srank, ssize;
231       if (l<osm->n) {
232         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
233         if (numbering[d] == count) {
234           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr);
235           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr);
236           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
237           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
238           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
239           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
240           if (view_subdomains) {
241             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
242           }
243           if (!pc->setupcalled) {
244             ierr = PetscViewerASCIIPrintf(sviewer, "Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
245           } else {
246             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
247           }
248           ierr = PetscViewerASCIIPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
249           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
250           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
251           ++l;
252         }
253       }
254       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
255     }
256     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
257     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
258   }
259   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
260   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
265 
266 
267 #undef __FUNCT__
268 #define __FUNCT__ "PCGASMSetHierarchicalPartitioning"
269 
270 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
271 {
272    PC_GASM              *osm = (PC_GASM*)pc->data;
273    MatPartitioning       part;
274    MPI_Comm              comm;
275    PetscMPIInt           size;
276    PetscInt              nlocalsubdomains,fromrows_localsize;
277    IS                    partitioning,fromrows,isn;
278    Vec                   outervec;
279    PetscErrorCode        ierr;
280 
281    PetscFunctionBegin;
282    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
283    ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
284    /* we do not need a hierarchical partitioning when
285     * the total number of subdomains is consistent with
286     * the number of MPI tasks.
287     * For the following cases, we do not need to use HP
288     * */
289    if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
290    if(size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
291    nlocalsubdomains = size/osm->N;
292    osm->n           = 1;
293    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
294    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
295    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
296    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
297    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
298    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
299    /* get new processor owner number of each vertex */
300    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
301    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
302    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
303    ierr = ISDestroy(&isn);CHKERRQ(ierr);
304    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
305    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
306    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
307    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
308    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
309    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
310    ierr = MatGetSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
311    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
312    osm->permutationIS = fromrows;
313    osm->pcmat =  pc->pmat;
314    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
315    pc->pmat = osm->permutationP;
316    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
317    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
318    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
319    osm->n           = PETSC_DETERMINE;
320    PetscFunctionReturn(0);
321 }
322 
323 
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "PCSetUp_GASM"
327 static PetscErrorCode PCSetUp_GASM(PC pc)
328 {
329   PC_GASM        *osm = (PC_GASM*)pc->data;
330   PetscErrorCode ierr;
331   PetscBool      symset,flg;
332   PetscInt       i,nInnerIndices,nTotalInnerIndices;
333   PetscMPIInt    rank, size;
334   MatReuse       scall = MAT_REUSE_MATRIX;
335   KSP            ksp;
336   PC             subpc;
337   const char     *prefix,*pprefix;
338   Vec            x,y;
339   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
340   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
341   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
342   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
343   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
344   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
345   PetscScalar    *gxarray, *gyarray;
346   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
347   PetscInt       num_subdomains    = 0;
348   DM             *subdomain_dm     = NULL;
349   char           **subdomain_names = NULL;
350   PetscInt       *numbering;
351 
352 
353   PetscFunctionBegin;
354   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
355   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
356   if (!pc->setupcalled) {
357 	/* use a hierarchical partitioning */
358     if(osm->hierarchicalpartitioning){
359       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
360     }
361     if (!osm->type_set) {
362       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
363       if (symset && flg) osm->type = PC_GASM_BASIC;
364     }
365 
366     if (osm->n == PETSC_DETERMINE) {
367       if (osm->N != PETSC_DETERMINE) {
368 	   /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
369 	   ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
370       } else if (osm->dm_subdomains && pc->dm) {
371 	/* try pc->dm next, if allowed */
372 	PetscInt  d;
373 	IS       *inner_subdomain_is, *outer_subdomain_is;
374 	ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
375 	if (num_subdomains) {
376 	  ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
377 	}
378 	for (d = 0; d < num_subdomains; ++d) {
379 	  if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
380 	  if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
381 	}
382 	ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
383 	ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
384       } else {
385 	/* still no subdomains; use one per processor */
386 	osm->nmax = osm->n = 1;
387 	ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
388 	osm->N    = size;
389 	ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
390       }
391     }
392     if (!osm->iis) {
393       /*
394        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
395        We create the requisite number of local inner subdomains and then expand them into
396        out subdomains, if necessary.
397        */
398       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
399     }
400     if (!osm->ois) {
401       /*
402 	    Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
403 	    has been requested, copy the inner subdomains over so they can be modified.
404       */
405       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
406       for (i=0; i<osm->n; ++i) {
407 	if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
408 	  ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
409 	  ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
410 	} else {
411 	  ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
412 	  osm->ois[i] = osm->iis[i];
413 	}
414       }
415       if (osm->overlap>0 && osm->N>1) {
416 	   /* Extend the "overlapping" regions by a number of steps */
417 	   ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
418       }
419     }
420 
421     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
422     if (osm->nmax == PETSC_DETERMINE) {
423       PetscMPIInt inwork,outwork;
424       /* determine global number of subdomains and the max number of local subdomains */
425       inwork = osm->n;
426       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
427       osm->nmax  = outwork;
428     }
429     if (osm->N == PETSC_DETERMINE) {
430       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
431       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
432     }
433 
434 
435     if (osm->sort_indices) {
436       for (i=0; i<osm->n; i++) {
437         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
438         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
439       }
440     }
441     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
442     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
443 
444     /*
445        Merge the ISs, create merged vectors and restrictions.
446      */
447     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
448     on = 0;
449     for (i=0; i<osm->n; i++) {
450       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
451       on  += oni;
452     }
453     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
454     on   = 0;
455     /* Merge local indices together */
456     for (i=0; i<osm->n; i++) {
457       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
458       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
459       ierr = PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);CHKERRQ(ierr);
460       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
461       on  += oni;
462     }
463     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
464     nTotalInnerIndices = 0;
465     for(i=0; i<osm->n; i++){
466       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
467       nTotalInnerIndices += nInnerIndices;
468     }
469     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
470     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
471 
472     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
473     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
474     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
475     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
476     /* gois might indices not on local */
477     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
478     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
479     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
480     ierr = VecDestroy(&x);CHKERRQ(ierr);
481     ierr = ISDestroy(&gois);CHKERRQ(ierr);
482 
483     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
484     {
485       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
486       PetscInt        in;            /* Number of indices in the disjoint uniont of local inner subdomains. */
487       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
488       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
489       IS              giis;          /* IS for the disjoint union of inner subdomains. */
490       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
491       PetscScalar    *array;
492       const PetscInt *indices;
493       PetscInt        k;
494       on = 0;
495       for (i=0; i<osm->n; i++) {
496         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
497         on  += oni;
498       }
499       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
500       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
501       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
502       /* set communicator id to determine where overlap is */
503       in   = 0;
504       for (i=0; i<osm->n; i++) {
505         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
506         for (k = 0; k < ini; ++k){
507           array[in+k] = numbering[i];
508         }
509         in += ini;
510       }
511       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
512       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
513       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
514       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
515       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
516       on  = 0;
517       in  = 0;
518       for (i=0; i<osm->n; i++) {
519     	ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
520     	ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
521     	for (k=0; k<oni; k++) {
522           /*  skip overlapping indices to get inner domain */
523           if(PetscRealPart(array[on+k]) != numbering[i]) continue;
524           iidx[in]    = indices[k];
525           ioidx[in++] = gostart+on+k;
526     	}
527     	ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
528     	on += oni;
529       }
530       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
531       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
532       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
533       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
534       ierr = VecDestroy(&y);CHKERRQ(ierr);
535       ierr = ISDestroy(&giis);CHKERRQ(ierr);
536       ierr = ISDestroy(&giois);CHKERRQ(ierr);
537     }
538     ierr = ISDestroy(&goid);CHKERRQ(ierr);
539     ierr = PetscFree(numbering);CHKERRQ(ierr);
540 
541     /* Create the subdomain work vectors. */
542     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
543     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
544     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
545     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
546     for (i=0, on=0; i<osm->n; ++i, on += oni) {
547       PetscInt oNi;
548       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
549       /* on a sub communicator */
550       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
551       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
552       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
553     }
554     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
555     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
556     /* Create the subdomain solvers */
557     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
558     for (i=0; i<osm->n; i++) {
559       char subprefix[PETSC_MAX_PATH_LEN+1];
560       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
561       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
562       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
563       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
564       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
565       if (subdomain_dm) {
566 	    ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
567 	    ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
568       }
569       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
570       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
571       if (subdomain_names && subdomain_names[i]) {
572 	     ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
573 	     ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
574 	     ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
575       }
576       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
577       osm->ksp[i] = ksp;
578     }
579     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
580     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
581     scall = MAT_INITIAL_MATRIX;
582 
583   } else { /* if (pc->setupcalled) */
584     /*
585        Destroy the submatrices from the previous iteration
586     */
587     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
588       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
589       scall = MAT_INITIAL_MATRIX;
590     }
591     if(osm->permutationIS){
592       ierr = MatGetSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
593       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
594       osm->pcmat = pc->pmat;
595       pc->pmat   = osm->permutationP;
596     }
597 
598   }
599 
600 
601   /*
602      Extract out the submatrices.
603   */
604   if (size > 1) {
605     ierr = MatGetSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
606   } else {
607     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
608   }
609   if (scall == MAT_INITIAL_MATRIX) {
610     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
611     for (i=0; i<osm->n; i++) {
612       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
613       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
614     }
615   }
616 
617   /* Return control to the user so that the submatrices can be modified (e.g., to apply
618      different boundary conditions for the submatrices than for the global problem) */
619   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
620 
621   /*
622      Loop over submatrices putting them into local ksps
623   */
624   for (i=0; i<osm->n; i++) {
625     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
626     if (!pc->setupcalled) {
627       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
628     }
629   }
630   if(osm->pcmat){
631     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
632     pc->pmat   = osm->pcmat;
633     osm->pcmat = 0;
634   }
635   PetscFunctionReturn(0);
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
640 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
641 {
642   PC_GASM        *osm = (PC_GASM*)pc->data;
643   PetscErrorCode ierr;
644   PetscInt       i;
645 
646   PetscFunctionBegin;
647   for (i=0; i<osm->n; i++) {
648     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "PCApply_GASM"
655 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
656 {
657   PC_GASM        *osm = (PC_GASM*)pc->data;
658   PetscErrorCode ierr;
659   PetscInt       i;
660   Vec            x,y;
661   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
662 
663   PetscFunctionBegin;
664   if(osm->pctoouter){
665     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
666     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667     x = osm->pcx;
668     y = osm->pcy;
669   }else{
670 	x = xin;
671 	y = yout;
672   }
673   /*
674      Support for limiting the restriction or interpolation only to the inner
675      subdomain values (leaving the other values 0).
676   */
677   if (!(osm->type & PC_GASM_RESTRICT)) {
678     /* have to zero the work RHS since scatter may leave some slots empty */
679     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
680     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
681   } else {
682     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
683   }
684   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
685   if (!(osm->type & PC_GASM_RESTRICT)) {
686     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
687   } else {
688     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
689   }
690   /* do the subdomain solves */
691   for (i=0; i<osm->n; ++i) {
692     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
693   }
694   /* Do we need to zero y ?? */
695   ierr = VecZeroEntries(y);CHKERRQ(ierr);
696   if (!(osm->type & PC_GASM_INTERPOLATE)) {
697     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
698     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
699   } else {
700     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
701     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
702   }
703   if(osm->pctoouter){
704     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
705     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
706   }
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "PCApplyTranspose_GASM"
712 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
713 {
714   PC_GASM        *osm = (PC_GASM*)pc->data;
715   PetscErrorCode ierr;
716   PetscInt       i;
717   Vec            x,y;
718   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
719 
720   PetscFunctionBegin;
721   if(osm->pctoouter){
722    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
723    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
724    x = osm->pcx;
725    y = osm->pcy;
726   }else{
727 	x = xin;
728 	y = yout;
729   }
730   /*
731      Support for limiting the restriction or interpolation to only local
732      subdomain values (leaving the other values 0).
733 
734      Note: these are reversed from the PCApply_GASM() because we are applying the
735      transpose of the three terms
736   */
737   if (!(osm->type & PC_GASM_INTERPOLATE)) {
738     /* have to zero the work RHS since scatter may leave some slots empty */
739     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
740     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
741   } else {
742     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
743   }
744   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
745   if (!(osm->type & PC_GASM_INTERPOLATE)) {
746     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
747   } else {
748     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
749   }
750   /* do the local solves */
751   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
752     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
753   }
754   ierr = VecZeroEntries(y);CHKERRQ(ierr);
755   if (!(osm->type & PC_GASM_RESTRICT)) {
756     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
757     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
758   } else {
759     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
760     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
761   }
762   if(osm->pctoouter){
763    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
764    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "PCReset_GASM"
771 static PetscErrorCode PCReset_GASM(PC pc)
772 {
773   PC_GASM        *osm = (PC_GASM*)pc->data;
774   PetscErrorCode ierr;
775   PetscInt       i;
776 
777   PetscFunctionBegin;
778   if (osm->ksp) {
779     for (i=0; i<osm->n; i++) {
780       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
781     }
782   }
783   if (osm->pmat) {
784     if (osm->n > 0) {
785       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
786     }
787   }
788   if (osm->x) {
789     for (i=0; i<osm->n; i++) {
790       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
791       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
792     }
793   }
794   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
795   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
796 
797   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
798   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
799   if (!osm->user_subdomains) {
800     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
801     osm->N    = PETSC_DETERMINE;
802     osm->nmax = PETSC_DETERMINE;
803   }
804   if(osm->pctoouter){
805 	ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
806   }
807   if(osm->permutationIS){
808 	ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
809   }
810   if(osm->pcx){
811 	ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
812   }
813   if(osm->pcy){
814 	ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
815   }
816   if(osm->permutationP){
817     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
818   }
819   if(osm->pcmat){
820 	ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "PCDestroy_GASM"
827 static PetscErrorCode PCDestroy_GASM(PC pc)
828 {
829   PC_GASM        *osm = (PC_GASM*)pc->data;
830   PetscErrorCode ierr;
831   PetscInt       i;
832 
833   PetscFunctionBegin;
834   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
835   /* PCReset will not destroy subdomains, if user_subdomains is true. */
836   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
837   if (osm->ksp) {
838     for (i=0; i<osm->n; i++) {
839       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
840     }
841     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(osm->x);CHKERRQ(ierr);
844   ierr = PetscFree(osm->y);CHKERRQ(ierr);
845   ierr = PetscFree(pc->data);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "PCSetFromOptions_GASM"
851 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
852 {
853   PC_GASM        *osm = (PC_GASM*)pc->data;
854   PetscErrorCode ierr;
855   PetscInt       blocks,ovl;
856   PetscBool      symset,flg;
857   PCGASMType     gasmtype;
858 
859   PetscFunctionBegin;
860   /* set the type to symmetric if matrix is symmetric */
861   if (!osm->type_set && pc->pmat) {
862     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
863     if (symset && flg) osm->type = PC_GASM_BASIC;
864   }
865   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
866   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
867   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
868   if (flg) {
869     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
870   }
871   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
872   if (flg) {
873     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
874     osm->dm_subdomains = PETSC_FALSE;
875   }
876   flg  = PETSC_FALSE;
877   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
878   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
879   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
880   ierr = PetscOptionsTail();CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 /*------------------------------------------------------------------------------------*/
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "PCGASMSetTotalSubdomains"
888 /*@
889     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
890                                communicator.
891     Logically collective on pc
892 
893     Input Parameters:
894 +   pc  - the preconditioner
895 -   N   - total number of subdomains
896 
897 
898     Level: beginner
899 
900 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
901 
902 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
903           PCGASMCreateSubdomains2D()
904 @*/
905 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
906 {
907   PC_GASM        *osm = (PC_GASM*)pc->data;
908   PetscMPIInt    size,rank;
909   PetscErrorCode ierr;
910 
911   PetscFunctionBegin;
912   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
913   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
914 
915   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
916   osm->ois = osm->iis = NULL;
917 
918   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
919   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
920   osm->N    = N;
921   osm->n    = PETSC_DETERMINE;
922   osm->nmax = PETSC_DETERMINE;
923   osm->dm_subdomains = PETSC_FALSE;
924   PetscFunctionReturn(0);
925 }
926 
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "PCGASMSetSubdomains_GASM"
930 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
931 {
932   PC_GASM         *osm = (PC_GASM*)pc->data;
933   PetscErrorCode  ierr;
934   PetscInt        i;
935 
936   PetscFunctionBegin;
937   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
938   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
939 
940   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
941   osm->iis  = osm->ois = NULL;
942   osm->n    = n;
943   osm->N    = PETSC_DETERMINE;
944   osm->nmax = PETSC_DETERMINE;
945   if (ois) {
946     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
947     for (i=0; i<n; i++) {
948       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
949       osm->ois[i] = ois[i];
950     }
951     /*
952        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
953        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
954     */
955     osm->overlap = -1;
956     /* inner subdomains must be provided  */
957     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
958   }/* end if */
959   if (iis) {
960     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
961     for (i=0; i<n; i++) {
962       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
963       osm->iis[i] = iis[i];
964     }
965     if (!ois) {
966       osm->ois = PETSC_NULL;
967       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
968     }
969   }
970 #if defined(PETSC_USE_DEBUG)
971   {
972     PetscInt        j,rstart,rend,*covered,lsize;
973     const PetscInt  *indices;
974     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
975     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
976     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
977     /* check if the current processor owns indices from others */
978     for (i=0; i<n; i++) {
979       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
980       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
981       for (j=0; j<lsize; j++) {
982         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
983         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
984         else covered[indices[j]-rstart] = 1;
985       }
986     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
987     }
988     /* check if we miss any indices */
989     for (i=rstart; i<rend; i++) {
990       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
991     }
992     ierr = PetscFree(covered);CHKERRQ(ierr);
993   }
994 #endif
995   if (iis)  osm->user_subdomains = PETSC_TRUE;
996   PetscFunctionReturn(0);
997 }
998 
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "PCGASMSetOverlap_GASM"
1002 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1003 {
1004   PC_GASM *osm = (PC_GASM*)pc->data;
1005 
1006   PetscFunctionBegin;
1007   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1008   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1009   if (!pc->setupcalled) osm->overlap = ovl;
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "PCGASMSetType_GASM"
1015 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1016 {
1017   PC_GASM *osm = (PC_GASM*)pc->data;
1018 
1019   PetscFunctionBegin;
1020   osm->type     = type;
1021   osm->type_set = PETSC_TRUE;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
1027 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1028 {
1029   PC_GASM *osm = (PC_GASM*)pc->data;
1030 
1031   PetscFunctionBegin;
1032   osm->sort_indices = doSort;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
1038 /*
1039    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1040         In particular, it would upset the global subdomain number calculation.
1041 */
1042 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1043 {
1044   PC_GASM        *osm = (PC_GASM*)pc->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1049 
1050   if (n) *n = osm->n;
1051   if (first) {
1052     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1053     *first -= osm->n;
1054   }
1055   if (ksp) {
1056     /* Assume that local solves are now different; not necessarily
1057        true, though!  This flag is used only for PCView_GASM() */
1058     *ksp                        = osm->ksp;
1059     osm->same_subdomain_solvers = PETSC_FALSE;
1060   }
1061   PetscFunctionReturn(0);
1062 } /* PCGASMGetSubKSP_GASM() */
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "PCGASMSetSubdomains"
1066 /*@C
1067     PCGASMSetSubdomains - Sets the subdomains for this processor
1068     for the additive Schwarz preconditioner.
1069 
1070     Collective on pc
1071 
1072     Input Parameters:
1073 +   pc  - the preconditioner object
1074 .   n   - the number of subdomains for this processor
1075 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1076 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1077 
1078     Notes:
1079     The IS indices use the parallel, global numbering of the vector entries.
1080     Inner subdomains are those where the correction is applied.
1081     Outer subdomains are those where the residual necessary to obtain the
1082     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1083     Both inner and outer subdomains can extend over several processors.
1084     This processor's portion of a subdomain is known as a local subdomain.
1085 
1086     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1087     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1088     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1089     on another MPI process.
1090 
1091     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1092 
1093 
1094     Level: advanced
1095 
1096 .keywords: PC, GASM, set, subdomains, additive Schwarz
1097 
1098 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1099           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1100 @*/
1101 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1102 {
1103   PC_GASM *osm = (PC_GASM*)pc->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1108   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1109   osm->dm_subdomains = PETSC_FALSE;
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "PCGASMSetOverlap"
1116 /*@
1117     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1118     additive Schwarz preconditioner.  Either all or no processors in the
1119     pc communicator must call this routine.
1120 
1121     Logically Collective on pc
1122 
1123     Input Parameters:
1124 +   pc  - the preconditioner context
1125 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1126 
1127     Options Database Key:
1128 .   -pc_gasm_overlap <overlap> - Sets overlap
1129 
1130     Notes:
1131     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1132     multiple subdomain per perocessor or "straddling" subdomains that intersect
1133     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1134 
1135     The overlap defaults to 0, so if one desires that no additional
1136     overlap be computed beyond what may have been set with a call to
1137     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1138     not explicitly set the subdomains in application code, then all overlap would be computed
1139     internally by PETSc, and using an overlap of 0 would result in an GASM
1140     variant that is equivalent to the block Jacobi preconditioner.
1141 
1142     Note that one can define initial index sets with any overlap via
1143     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1144     PETSc to extend that overlap further, if desired.
1145 
1146     Level: intermediate
1147 
1148 .keywords: PC, GASM, set, overlap
1149 
1150 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1151           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1152 @*/
1153 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1154 {
1155   PetscErrorCode ierr;
1156   PC_GASM *osm = (PC_GASM*)pc->data;
1157 
1158   PetscFunctionBegin;
1159   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1160   PetscValidLogicalCollectiveInt(pc,ovl,2);
1161   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1162   osm->dm_subdomains = PETSC_FALSE;
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "PCGASMSetType"
1168 /*@
1169     PCGASMSetType - Sets the type of restriction and interpolation used
1170     for local problems in the additive Schwarz method.
1171 
1172     Logically Collective on PC
1173 
1174     Input Parameters:
1175 +   pc  - the preconditioner context
1176 -   type - variant of GASM, one of
1177 .vb
1178       PC_GASM_BASIC       - full interpolation and restriction
1179       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1180       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1181       PC_GASM_NONE        - local processor restriction and interpolation
1182 .ve
1183 
1184     Options Database Key:
1185 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1186 
1187     Level: intermediate
1188 
1189 .keywords: PC, GASM, set, type
1190 
1191 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1192           PCGASMCreateSubdomains2D()
1193 @*/
1194 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1195 {
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1200   PetscValidLogicalCollectiveEnum(pc,type,2);
1201   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "PCGASMSetSortIndices"
1207 /*@
1208     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1209 
1210     Logically Collective on PC
1211 
1212     Input Parameters:
1213 +   pc  - the preconditioner context
1214 -   doSort - sort the subdomain indices
1215 
1216     Level: intermediate
1217 
1218 .keywords: PC, GASM, set, type
1219 
1220 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1221           PCGASMCreateSubdomains2D()
1222 @*/
1223 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1224 {
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1229   PetscValidLogicalCollectiveBool(pc,doSort,2);
1230   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "PCGASMGetSubKSP"
1236 /*@C
1237    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1238    this processor.
1239 
1240    Collective on PC iff first_local is requested
1241 
1242    Input Parameter:
1243 .  pc - the preconditioner context
1244 
1245    Output Parameters:
1246 +  n_local - the number of blocks on this processor or NULL
1247 .  first_local - the global number of the first block on this processor or NULL,
1248                  all processors must request or all must pass NULL
1249 -  ksp - the array of KSP contexts
1250 
1251    Note:
1252    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1253 
1254    Currently for some matrix implementations only 1 block per processor
1255    is supported.
1256 
1257    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1258 
1259    Level: advanced
1260 
1261 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1262 
1263 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1264           PCGASMCreateSubdomains2D(),
1265 @*/
1266 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1267 {
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1272   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /* -------------------------------------------------------------------------------------*/
1277 /*MC
1278    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1279            its own KSP object.
1280 
1281    Options Database Keys:
1282 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1283 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1284 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1285 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1286 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1287 
1288      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1289       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1290       -pc_gasm_type basic to use the standard GASM.
1291 
1292    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1293      than one processor. Defaults to one block per processor.
1294 
1295      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1296         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1297 
1298      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1299          and set the options directly on the resulting KSP object (you can access its PC
1300          with KSPGetPC())
1301 
1302 
1303    Level: beginner
1304 
1305    Concepts: additive Schwarz method
1306 
1307     References:
1308 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1309      Courant Institute, New York University Technical report
1310 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1311     Cambridge University Press.
1312 
1313 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1314            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1315            PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1316 
1317 M*/
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "PCCreate_GASM"
1321 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1322 {
1323   PetscErrorCode ierr;
1324   PC_GASM        *osm;
1325 
1326   PetscFunctionBegin;
1327   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1328 
1329   osm->N                        = PETSC_DETERMINE;
1330   osm->n                        = PETSC_DECIDE;
1331   osm->nmax                     = PETSC_DETERMINE;
1332   osm->overlap                  = 0;
1333   osm->ksp                      = 0;
1334   osm->gorestriction            = 0;
1335   osm->girestriction            = 0;
1336   osm->pctoouter                = 0;
1337   osm->gx                       = 0;
1338   osm->gy                       = 0;
1339   osm->x                        = 0;
1340   osm->y                        = 0;
1341   osm->pcx                      = 0;
1342   osm->pcy                      = 0;
1343   osm->permutationIS            = 0;
1344   osm->permutationP             = 0;
1345   osm->pcmat                    = 0;
1346   osm->ois                      = 0;
1347   osm->iis                      = 0;
1348   osm->pmat                     = 0;
1349   osm->type                     = PC_GASM_RESTRICT;
1350   osm->same_subdomain_solvers   = PETSC_TRUE;
1351   osm->sort_indices             = PETSC_TRUE;
1352   osm->dm_subdomains            = PETSC_FALSE;
1353   osm->hierarchicalpartitioning = PETSC_FALSE;
1354 
1355   pc->data                 = (void*)osm;
1356   pc->ops->apply           = PCApply_GASM;
1357   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1358   pc->ops->setup           = PCSetUp_GASM;
1359   pc->ops->reset           = PCReset_GASM;
1360   pc->ops->destroy         = PCDestroy_GASM;
1361   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1362   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1363   pc->ops->view            = PCView_GASM;
1364   pc->ops->applyrichardson = 0;
1365 
1366   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1367   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1368   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1369   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1370   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "PCGASMCreateLocalSubdomains"
1377 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1378 {
1379   MatPartitioning mpart;
1380   const char      *prefix;
1381   PetscErrorCode  (*f)(Mat,MatReuse,Mat*);
1382   PetscMPIInt     size;
1383   PetscInt        i,j,rstart,rend,bs;
1384   PetscBool       isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1385   Mat             Ad     = NULL, adj;
1386   IS              ispart,isnumb,*is;
1387   PetscErrorCode  ierr;
1388 
1389   PetscFunctionBegin;
1390   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1391 
1392   /* Get prefix, row distribution, and block size */
1393   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1394   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1395   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1396   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1397 
1398   /* Get diagonal block from matrix if possible */
1399   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1400   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",&f);CHKERRQ(ierr);
1401   if (f) {
1402     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1403   } else if (size == 1) {
1404     Ad = A;
1405   }
1406   if (Ad) {
1407     ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1408     if (!isbaij) {ierr = PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1409   }
1410   if (Ad && nloc > 1) {
1411     PetscBool  match,done;
1412     /* Try to setup a good matrix partitioning if available */
1413     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1414     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1415     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1416     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1417     if (!match) {
1418       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1419     }
1420     if (!match) { /* assume a "good" partitioner is available */
1421       PetscInt       na;
1422       const PetscInt *ia,*ja;
1423       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1424       if (done) {
1425         /* Build adjacency matrix by hand. Unfortunately a call to
1426            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1427            remove the block-aij structure and we cannot expect
1428            MatPartitioning to split vertices as we need */
1429         PetscInt       i,j,len,nnz,cnt,*iia=0,*jja=0;
1430         const PetscInt *row;
1431         nnz = 0;
1432         for (i=0; i<na; i++) { /* count number of nonzeros */
1433           len = ia[i+1] - ia[i];
1434           row = ja + ia[i];
1435           for (j=0; j<len; j++) {
1436             if (row[j] == i) { /* don't count diagonal */
1437               len--; break;
1438             }
1439           }
1440           nnz += len;
1441         }
1442         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1443         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1444         nnz    = 0;
1445         iia[0] = 0;
1446         for (i=0; i<na; i++) { /* fill adjacency */
1447           cnt = 0;
1448           len = ia[i+1] - ia[i];
1449           row = ja + ia[i];
1450           for (j=0; j<len; j++) {
1451             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1452           }
1453           nnz += cnt;
1454           iia[i+1] = nnz;
1455         }
1456         /* Partitioning of the adjacency matrix */
1457         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1458         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1459         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1460         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1461         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1462         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1463         foundpart = PETSC_TRUE;
1464       }
1465       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1466     }
1467     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1468   }
1469   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1470   if (!foundpart) {
1471 
1472     /* Partitioning by contiguous chunks of rows */
1473 
1474     PetscInt mbs   = (rend-rstart)/bs;
1475     PetscInt start = rstart;
1476     for (i=0; i<nloc; i++) {
1477       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1478       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1479       start += count;
1480     }
1481 
1482   } else {
1483 
1484     /* Partitioning by adjacency of diagonal block  */
1485 
1486     const PetscInt *numbering;
1487     PetscInt       *count,nidx,*indices,*newidx,start=0;
1488     /* Get node count in each partition */
1489     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1490     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1491     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1492       for (i=0; i<nloc; i++) count[i] *= bs;
1493     }
1494     /* Build indices from node numbering */
1495     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1496     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1497     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1498     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1499     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1500     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1501     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1502       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1503       for (i=0; i<nidx; i++) {
1504         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1505       }
1506       ierr    = PetscFree(indices);CHKERRQ(ierr);
1507       nidx   *= bs;
1508       indices = newidx;
1509     }
1510     /* Shift to get global indices */
1511     for (i=0; i<nidx; i++) indices[i] += rstart;
1512 
1513     /* Build the index sets for each block */
1514     for (i=0; i<nloc; i++) {
1515       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1516       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1517       start += count[i];
1518     }
1519 
1520     ierr = PetscFree(count);CHKERRQ(ierr);
1521     ierr = PetscFree(indices);CHKERRQ(ierr);
1522     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1523     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1524   }
1525   *iis = is;
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "PCGASMCreateStraddlingSubdomains"
1531 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1532 {
1533   PetscErrorCode  ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "PCGASMCreateSubdomains"
1544 /*@C
1545    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1546    Schwarz preconditioner for a any problem based on its matrix.
1547 
1548    Collective
1549 
1550    Input Parameters:
1551 +  A       - The global matrix operator
1552 -  N       - the number of global subdomains requested
1553 
1554    Output Parameters:
1555 +  n   - the number of subdomains created on this processor
1556 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1557 
1558    Level: advanced
1559 
1560    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1561          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1562          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1563 	 outer subdomains will be automatically generated from these according to the requested amount of
1564 	 overlap; this is currently supported only with local subdomains.
1565 
1566 
1567 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1568 
1569 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1570 @*/
1571 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1572 {
1573   PetscMPIInt     size;
1574   PetscErrorCode  ierr;
1575 
1576   PetscFunctionBegin;
1577   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1578   PetscValidPointer(iis,4);
1579 
1580   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1581   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1582   if (N >= size) {
1583     *n = N/size + (N%size);
1584     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1585   } else {
1586     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "PCGASMDestroySubdomains"
1593 /*@C
1594    PCGASMDestroySubdomains - Destroys the index sets created with
1595    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1596    called after setting subdomains with PCGASMSetSubdomains().
1597 
1598    Collective
1599 
1600    Input Parameters:
1601 +  n   - the number of index sets
1602 .  iis - the array of inner subdomains,
1603 -  ois - the array of outer subdomains, can be NULL
1604 
1605    Level: intermediate
1606 
1607    Notes: this is merely a convenience subroutine that walks each list,
1608    destroys each IS on the list, and then frees the list. At the end the
1609    list pointers are set to NULL.
1610 
1611 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1612 
1613 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1614 @*/
1615 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1616 {
1617   PetscInt       i;
1618   PetscErrorCode ierr;
1619 
1620   PetscFunctionBegin;
1621   if (n <= 0) PetscFunctionReturn(0);
1622   if (ois) {
1623     PetscValidPointer(ois,3);
1624     if (*ois) {
1625       PetscValidPointer(*ois,3);
1626       for (i=0; i<n; i++) {
1627         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1628       }
1629       ierr = PetscFree((*ois));CHKERRQ(ierr);
1630     }
1631   }
1632   if (iis) {
1633     PetscValidPointer(iis,2);
1634     if (*iis) {
1635       PetscValidPointer(*iis,2);
1636       for (i=0; i<n; i++) {
1637         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1638       }
1639       ierr = PetscFree((*iis));CHKERRQ(ierr);
1640     }
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 
1646 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1647   {                                                                                                       \
1648     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1649     /*                                                                                                    \
1650      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1651      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1652      subdomain).                                                                                          \
1653      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1654      of the intersection.                                                                                 \
1655     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1656     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1657     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1658     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1659     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1660     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1661     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1662     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1663     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1664     /* Now compute the size of the local subdomain n. */ \
1665     *n = 0;                                               \
1666     if (*ylow_loc < *yhigh_loc) {                           \
1667       PetscInt width = xright-xleft;                     \
1668       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1669       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1670       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1671     } \
1672   }
1673 
1674 
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1678 /*@
1679    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1680    preconditioner for a two-dimensional problem on a regular grid.
1681 
1682    Collective
1683 
1684    Input Parameters:
1685 +  M, N               - the global number of grid points in the x and y directions
1686 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1687 .  dof                - degrees of freedom per node
1688 -  overlap            - overlap in mesh lines
1689 
1690    Output Parameters:
1691 +  Nsub - the number of local subdomains created
1692 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1693 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1694 
1695 
1696    Level: advanced
1697 
1698 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1699 
1700 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1701 @*/
1702 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1703 {
1704   PetscErrorCode ierr;
1705   PetscMPIInt    size, rank;
1706   PetscInt       i, j;
1707   PetscInt       maxheight, maxwidth;
1708   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1709   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1710   PetscInt       x[2][2], y[2][2], n[2];
1711   PetscInt       first, last;
1712   PetscInt       nidx, *idx;
1713   PetscInt       ii,jj,s,q,d;
1714   PetscInt       k,kk;
1715   PetscMPIInt    color;
1716   MPI_Comm       comm, subcomm;
1717   IS             **xis = 0, **is = ois, **is_local = iis;
1718 
1719   PetscFunctionBegin;
1720   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1721   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1722   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1723   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1724   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1725                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1726 
1727   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1728   s      = 0;
1729   ystart = 0;
1730   for (j=0; j<Ndomains; ++j) {
1731     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1732     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1733     /* Vertical domain limits with an overlap. */
1734     ylow   = PetscMax(ystart - overlap,0);
1735     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1736     xstart = 0;
1737     for (i=0; i<Mdomains; ++i) {
1738       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1739       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1740       /* Horizontal domain limits with an overlap. */
1741       xleft  = PetscMax(xstart - overlap,0);
1742       xright = PetscMin(xstart + maxwidth + overlap,M);
1743       /*
1744          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1745       */
1746       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1747       if (nidx) ++s;
1748       xstart += maxwidth;
1749     } /* for (i = 0; i < Mdomains; ++i) */
1750     ystart += maxheight;
1751   } /* for (j = 0; j < Ndomains; ++j) */
1752 
1753   /* Now we can allocate the necessary number of ISs. */
1754   *nsub  = s;
1755   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1756   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1757   s      = 0;
1758   ystart = 0;
1759   for (j=0; j<Ndomains; ++j) {
1760     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1761     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1762     /* Vertical domain limits with an overlap. */
1763     y[0][0] = PetscMax(ystart - overlap,0);
1764     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1765     /* Vertical domain limits without an overlap. */
1766     y[1][0] = ystart;
1767     y[1][1] = PetscMin(ystart + maxheight,N);
1768     xstart  = 0;
1769     for (i=0; i<Mdomains; ++i) {
1770       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1771       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1772       /* Horizontal domain limits with an overlap. */
1773       x[0][0] = PetscMax(xstart - overlap,0);
1774       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1775       /* Horizontal domain limits without an overlap. */
1776       x[1][0] = xstart;
1777       x[1][1] = PetscMin(xstart+maxwidth,M);
1778       /*
1779          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1780          Do this twice: first for the domains with overlaps, and once without.
1781          During the first pass create the subcommunicators, and use them on the second pass as well.
1782       */
1783       for (q = 0; q < 2; ++q) {
1784         PetscBool split = PETSC_FALSE;
1785         /*
1786           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1787           according to whether the domain with an overlap or without is considered.
1788         */
1789         xleft = x[q][0]; xright = x[q][1];
1790         ylow  = y[q][0]; yhigh  = y[q][1];
1791         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1792         nidx *= dof;
1793         n[q]  = nidx;
1794         /*
1795          Based on the counted number of indices in the local domain *with an overlap*,
1796          construct a subcommunicator of all the processors supporting this domain.
1797          Observe that a domain with an overlap might have nontrivial local support,
1798          while the domain without an overlap might not.  Hence, the decision to participate
1799          in the subcommunicator must be based on the domain with an overlap.
1800          */
1801         if (q == 0) {
1802           if (nidx) color = 1;
1803           else color = MPI_UNDEFINED;
1804           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1805           split = PETSC_TRUE;
1806         }
1807         /*
1808          Proceed only if the number of local indices *with an overlap* is nonzero.
1809          */
1810         if (n[0]) {
1811           if (q == 0) xis = is;
1812           if (q == 1) {
1813             /*
1814              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1815              Moreover, if the overlap is zero, the two ISs are identical.
1816              */
1817             if (overlap == 0) {
1818               (*is_local)[s] = (*is)[s];
1819               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1820               continue;
1821             } else {
1822               xis     = is_local;
1823               subcomm = ((PetscObject)(*is)[s])->comm;
1824             }
1825           } /* if (q == 1) */
1826           idx  = NULL;
1827           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1828           if (nidx) {
1829             k = 0;
1830             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1831               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1832               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1833               kk = dof*(M*jj + x0);
1834               for (ii=x0; ii<x1; ++ii) {
1835                 for (d = 0; d < dof; ++d) {
1836                   idx[k++] = kk++;
1837                 }
1838               }
1839             }
1840           }
1841           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1842           if (split) {
1843             ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
1844           }
1845         }/* if (n[0]) */
1846       }/* for (q = 0; q < 2; ++q) */
1847       if (n[0]) ++s;
1848       xstart += maxwidth;
1849     } /* for (i = 0; i < Mdomains; ++i) */
1850     ystart += maxheight;
1851   } /* for (j = 0; j < Ndomains; ++j) */
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "PCGASMGetSubdomains"
1857 /*@C
1858     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1859     for the additive Schwarz preconditioner.
1860 
1861     Not Collective
1862 
1863     Input Parameter:
1864 .   pc - the preconditioner context
1865 
1866     Output Parameters:
1867 +   n   - the number of subdomains for this processor (default value = 1)
1868 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1869 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1870 
1871 
1872     Notes:
1873     The user is responsible for destroying the ISs and freeing the returned arrays.
1874     The IS numbering is in the parallel, global numbering of the vector.
1875 
1876     Level: advanced
1877 
1878 .keywords: PC, GASM, get, subdomains, additive Schwarz
1879 
1880 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1881           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1882 @*/
1883 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1884 {
1885   PC_GASM        *osm;
1886   PetscErrorCode ierr;
1887   PetscBool      match;
1888   PetscInt       i;
1889 
1890   PetscFunctionBegin;
1891   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1892   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1893   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1894   osm = (PC_GASM*)pc->data;
1895   if (n) *n = osm->n;
1896   if (iis) {
1897     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1898   }
1899   if (ois) {
1900     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1901   }
1902   if (iis || ois) {
1903     for (i = 0; i < osm->n; ++i) {
1904       if (iis) (*iis)[i] = osm->iis[i];
1905       if (ois) (*ois)[i] = osm->ois[i];
1906     }
1907   }
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "PCGASMGetSubmatrices"
1913 /*@C
1914     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1915     only) for the additive Schwarz preconditioner.
1916 
1917     Not Collective
1918 
1919     Input Parameter:
1920 .   pc - the preconditioner context
1921 
1922     Output Parameters:
1923 +   n   - the number of matrices for this processor (default value = 1)
1924 -   mat - the matrices
1925 
1926     Notes: matrices returned by this routine have the same communicators as the index sets (IS)
1927            used to define subdomains in PCGASMSetSubdomains()
1928     Level: advanced
1929 
1930 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1931 
1932 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1933           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1934 @*/
1935 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1936 {
1937   PC_GASM        *osm;
1938   PetscErrorCode ierr;
1939   PetscBool      match;
1940 
1941   PetscFunctionBegin;
1942   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1943   PetscValidIntPointer(n,2);
1944   if (mat) PetscValidPointer(mat,3);
1945   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1946   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1947   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1948   osm = (PC_GASM*)pc->data;
1949   if (n) *n = osm->n;
1950   if (mat) *mat = osm->pmat;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "PCGASMSetUseDMSubdomains"
1956 /*@
1957     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1958     Logically Collective
1959 
1960     Input Parameter:
1961 +   pc  - the preconditioner
1962 -   flg - boolean indicating whether to use subdomains defined by the DM
1963 
1964     Options Database Key:
1965 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1966 
1967     Level: intermediate
1968 
1969     Notes:
1970     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1971     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1972     automatically turns the latter off.
1973 
1974 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
1975 
1976 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1977           PCGASMCreateSubdomains2D()
1978 @*/
1979 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1980 {
1981   PC_GASM        *osm = (PC_GASM*)pc->data;
1982   PetscErrorCode ierr;
1983   PetscBool      match;
1984 
1985   PetscFunctionBegin;
1986   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1987   PetscValidLogicalCollectiveBool(pc,flg,2);
1988   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1989   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1990   if (match) {
1991     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1992       osm->dm_subdomains = flg;
1993     }
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "PCGASMGetUseDMSubdomains"
2000 /*@
2001     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
2002     Not Collective
2003 
2004     Input Parameter:
2005 .   pc  - the preconditioner
2006 
2007     Output Parameter:
2008 .   flg - boolean indicating whether to use subdomains defined by the DM
2009 
2010     Level: intermediate
2011 
2012 .keywords: PC, ASM, DM, set, subdomains, additive Schwarz
2013 
2014 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
2015           PCGASMCreateSubdomains2D()
2016 @*/
2017 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
2018 {
2019   PC_GASM        *osm = (PC_GASM*)pc->data;
2020   PetscErrorCode ierr;
2021   PetscBool      match;
2022 
2023   PetscFunctionBegin;
2024   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
2025   PetscValidPointer(flg,2);
2026   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
2027   if (match) {
2028     if (flg) *flg = osm->dm_subdomains;
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032