xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
5   In this version each processor may have any number of subdomains and a subdomain may live on multiple
6   processors.
7 
8        N    - total number of subdomains on all processors
9        n    - actual number of subdomains on this processor
10        nmax - maximum number of subdomains per processor
11 */
12 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
13 
14 typedef struct {
15   PetscInt   N,n,nmax;
16   PetscInt   overlap;             /* overlap requested by user */
17   KSP        *ksp;                /* linear solvers for each block */
18   Vec        gx,gy;               /* Merged work vectors */
19   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
20   IS         gis, gis_local;      /* merged ISs */
21   VecScatter grestriction;        /* merged restriction */
22   VecScatter gprolongation;       /* merged prolongation */
23   IS         *is;                 /* index set that defines each overlapping subdomain */
24   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
25   Mat        *pmat;               /* subdomain block matrices */
26   PCGASMType  type;               /* use reduced interpolation, restriction or both */
27   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
28   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
29   PetscBool  sort_indices;        /* flag to sort subdomain indices */
30 } PC_GASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCGASMPrintSubdomains"
34 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
35 {
36   PC_GASM         *osm  = (PC_GASM*)pc->data;
37   const char     *prefix;
38   char           fname[PETSC_MAX_PATH_LEN+1];
39   PetscViewer    viewer, sviewer;
40   PetscInt       i,j,nidx;
41   const PetscInt *idx;
42   char           *cidx;
43   PetscBool      found;
44   PetscMPIInt    size, rank;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
49   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
50   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
51   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
52   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
53   for (i=0;i<osm->n;++i) {
54     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
55     ierr = ISGetLocalSize(osm->is[i], &nidx); CHKERRQ(ierr);
56     /*
57      No more than 15 characters per index plus a space.
58      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
59      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
60      For nidx == 0, the whole string 16 '\0'.
61      */
62     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
63     ierr = ISGetIndices(osm->is[i], &idx);    CHKERRQ(ierr);
64     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
65     for(j = 0; j < nidx; ++j) {
66       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
67     }
68     ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
69     ierr = ISRestoreIndices(osm->is[i],&idx); CHKERRQ(ierr);
70 
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n"); CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
74     ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
75     ierr = PetscFree(cidx); CHKERRQ(ierr);
76     ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
77     if (osm->is_local) {
78       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
79       ierr = ISGetLocalSize(osm->is_local[i], &nidx); CHKERRQ(ierr);
80       /*
81        No more than 15 characters per index plus a space.
82        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
83        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
84        For nidx == 0, the whole string 16 '\0'.
85        */
86       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx); CHKERRQ(ierr);
87       ierr = ISGetIndices(osm->is_local[i], &idx);    CHKERRQ(ierr);
88       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer); CHKERRQ(ierr);
89       for(j = 0; j < nidx; ++j) {
90         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]); CHKERRQ(ierr);
91       }
92       ierr = PetscViewerDestroy(sviewer); CHKERRQ(ierr);
93       ierr = ISRestoreIndices(osm->is_local[i],&idx); CHKERRQ(ierr);
94 
95       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n"); CHKERRQ(ierr);
96       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx); CHKERRQ(ierr);
97       ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
98       ierr = PetscViewerASCIIPrintf(viewer, "\n"); CHKERRQ(ierr);
99       ierr = PetscFree(cidx); CHKERRQ(ierr);
100       ierr = PetscViewerDestroy(viewer); CHKERRQ(ierr);
101     }
102   }
103   PetscFunctionReturn(0);
104 }
105 
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "PCView_GASM"
109 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
110 {
111   PC_GASM         *osm = (PC_GASM*)pc->data;
112   const char     *prefix;
113   PetscErrorCode ierr;
114   PetscMPIInt    rank, size;
115   PetscInt       i,bsz;
116   PetscBool      iascii,isstring, print_subdomains;
117   PetscViewer    sviewer;
118 
119 
120   PetscFunctionBegin;
121   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size); CHKERRQ(ierr);
122   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank); CHKERRQ(ierr);
123   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
124   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
125   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
126   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
127   if (iascii) {
128     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
129     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
130     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
131     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
132     ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
133     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
134     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
135     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
136     if (osm->same_local_solves) {
137       if (osm->ksp) {
138         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
139         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
140         if (!rank) {
141           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
142           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
143           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
144         }
145         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
146       }
147     } else {
148       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
149       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
150       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
151       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
152       for (i=0; i<osm->nmax; i++) {
153         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
154         if (i < osm->n) {
155           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
156           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
157           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
158           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
159         }
160         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
161       }
162       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
163       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
164     }
165   } else if (isstring) {
166     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
167     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
168     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
169     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
170   } else {
171     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
172   }
173   ierr = PetscViewerFlush(viewer); CHKERRQ(ierr);
174   if(print_subdomains) {
175     ierr = PCGASMPrintSubdomains(pc); CHKERRQ(ierr);
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 
181 
182 
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "PCSetUp_GASM"
186 static PetscErrorCode PCSetUp_GASM(PC pc)
187 {
188   PC_GASM         *osm  = (PC_GASM*)pc->data;
189   PetscErrorCode ierr;
190   PetscBool      symset,flg;
191   PetscInt       i;
192   PetscMPIInt    rank, size;
193   MatReuse       scall = MAT_REUSE_MATRIX;
194   KSP            ksp;
195   PC             subpc;
196   const char     *prefix,*pprefix;
197   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
198   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
199   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
200   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
201   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
202   Vec            x,y;
203   PetscScalar    *gxarray, *gyarray;
204   PetscInt       gfirst, glast;
205 
206   PetscFunctionBegin;
207   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
208   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
209   if (!pc->setupcalled) {
210 
211     if (!osm->type_set) {
212       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
213       if (symset && flg) { osm->type = PC_GASM_BASIC; }
214     }
215 
216     if (osm->N == PETSC_DECIDE && osm->n < 1) {
217       /* no subdomains given, use one per processor */
218       osm->nmax = osm->n = 1;
219       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
220       osm->N = size;
221     } else if (osm->N == PETSC_DECIDE) {
222       PetscInt inwork[2], outwork[2];
223       /* determine global number of subdomains and the max number of local subdomains */
224       inwork[0] = inwork[1] = osm->n;
225       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
226       osm->nmax = outwork[0];
227       osm->N    = outwork[1];
228     }
229     if (!osm->is){ /* create the index sets */
230       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
231     }
232     if (!osm->is_local) {
233       /*
234 	 This indicates that osm->is should define a nonoverlapping decomposition
235 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
236 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
237 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
238 	 Therefore, osm->is will be used to define osm->is_local.
239 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
240 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
241 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
242       */
243       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
244       for (i=0; i<osm->n; i++) {
245         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
246           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
247           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
248         } else {
249           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
250           osm->is_local[i] = osm->is[i];
251         }
252       }
253     }
254     /* Beyond this point osm->is_local is not null. */
255     if (osm->overlap > 0) {
256       /* Extend the "overlapping" regions by a number of steps */
257       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
258     }
259     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
260     flg  = PETSC_FALSE;
261     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
262     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
263 
264     if (osm->sort_indices) {
265       for (i=0; i<osm->n; i++) {
266         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
267 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
268       }
269     }
270     /* Merge the ISs, create merged vectors and scatter contexts. */
271     /* Restriction ISs. */
272     ddn = 0;
273     for (i=0; i<osm->n; i++) {
274       ierr = ISGetLocalSize(osm->is[i],&dn);          CHKERRQ(ierr);
275       ddn += dn;
276     }
277     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx); CHKERRQ(ierr);
278     ddn = 0;
279     for (i=0; i<osm->n; i++) {
280       ierr = ISGetLocalSize(osm->is[i],&dn); CHKERRQ(ierr);
281       ierr = ISGetIndices(osm->is[i],&didx); CHKERRQ(ierr);
282       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn); CHKERRQ(ierr);
283       ierr = ISRestoreIndices(osm->is[i], &didx); CHKERRQ(ierr);
284       ddn += dn;
285     }
286     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis); CHKERRQ(ierr);
287     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
288     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx); CHKERRQ(ierr);
289     ierr = VecDuplicate(osm->gx,&osm->gy);                                     CHKERRQ(ierr);
290     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);                     CHKERRQ(ierr);
291     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);              CHKERRQ(ierr);
292     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));     CHKERRQ(ierr);
293     ierr = ISDestroy(gid);                                                     CHKERRQ(ierr);
294     /* Prolongation ISs */
295     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
296       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
297       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
298       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
299       /**/
300       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
301       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
302       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
303       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
304       PetscInt               j;
305       ddn_local = 0;
306       for (i=0; i<osm->n; i++) {
307 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
308 	ddn_local += dn_local;
309       }
310       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local); CHKERRQ(ierr);
311       ddn_local = 0;
312       for (i=0; i<osm->n; i++) {
313 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local); CHKERRQ(ierr);
314 	ierr = ISGetIndices(osm->is_local[i],&didx_local); CHKERRQ(ierr);
315 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local); CHKERRQ(ierr);
316 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local); CHKERRQ(ierr);
317 	ddn_local += dn_local;
318       }
319       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal); CHKERRQ(ierr);
320       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local)); CHKERRQ(ierr);
321       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
322       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
323       ierr = ISLocalToGlobalMappingDestroy(ltog);CHKERRQ(ierr);
324       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
325       /* Now convert these localized indices into the global indices into the merged output vector. */
326       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast); CHKERRQ(ierr);
327       for(j=0; j < ddn_llocal; ++j) {
328 	ddidx_llocal[j] += gfirst;
329       }
330       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal); CHKERRQ(ierr);
331       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);              CHKERRQ(ierr);
332       ierr = ISDestroy(gis_llocal);CHKERRQ(ierr);
333     }
334     /* Create the subdomain work vectors. */
335     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
336     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
337     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
338     ierr = VecGetArray(osm->gx, &gxarray);                                     CHKERRQ(ierr);
339     ierr = VecGetArray(osm->gy, &gyarray);                                     CHKERRQ(ierr);
340     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
341       PetscInt dN;
342       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
343       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
344       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
345       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
346     }
347     ierr = VecRestoreArray(osm->gx, &gxarray); CHKERRQ(ierr);
348     ierr = VecRestoreArray(osm->gy, &gyarray); CHKERRQ(ierr);
349     ierr = VecDestroy(x); CHKERRQ(ierr);
350     ierr = VecDestroy(y); CHKERRQ(ierr);
351     /* Create the local solvers */
352     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
353     for (i=0; i<osm->n; i++) { /* KSPs are local */
354       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
355       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
356       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
357       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
358       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
359       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
360       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
361       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
362       osm->ksp[i] = ksp;
363     }
364     scall = MAT_INITIAL_MATRIX;
365 
366   } else {
367     /*
368        Destroy the blocks from the previous iteration
369     */
370     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
371       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
372       scall = MAT_INITIAL_MATRIX;
373     }
374   }
375 
376   /*
377      Extract out the submatrices.
378   */
379   if(size > 1) {
380     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
381   }
382   else {
383     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
384   }
385   if (scall == MAT_INITIAL_MATRIX) {
386     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
387     for (i=0; i<osm->n; i++) {
388       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
389       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
390     }
391   }
392 
393   /* Return control to the user so that the submatrices can be modified (e.g., to apply
394      different boundary conditions for the submatrices than for the global problem) */
395   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
396 
397   /*
398      Loop over submatrices putting them into local ksp
399   */
400   for (i=0; i<osm->n; i++) {
401     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
402     if (!pc->setupcalled) {
403       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
404     }
405   }
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
412 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
413 {
414   PC_GASM         *osm = (PC_GASM*)pc->data;
415   PetscErrorCode ierr;
416   PetscInt       i;
417 
418   PetscFunctionBegin;
419   for (i=0; i<osm->n; i++) {
420     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
421   }
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "PCApply_GASM"
427 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
428 {
429   PC_GASM         *osm = (PC_GASM*)pc->data;
430   PetscErrorCode ierr;
431   PetscInt       i;
432   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
433 
434   PetscFunctionBegin;
435   /*
436      Support for limiting the restriction or interpolation to only local
437      subdomain values (leaving the other values 0).
438   */
439   if (!(osm->type & PC_GASM_RESTRICT)) {
440     forward = SCATTER_FORWARD_LOCAL;
441     /* have to zero the work RHS since scatter may leave some slots empty */
442     ierr = VecZeroEntries(osm->gx); CHKERRQ(ierr);
443   }
444   if (!(osm->type & PC_GASM_INTERPOLATE)) {
445     reverse = SCATTER_REVERSE_LOCAL;
446   }
447 
448   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
449   ierr = VecZeroEntries(y);CHKERRQ(ierr);
450   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
451   /* do the subdomain solves */
452   for (i=0; i<osm->n; ++i) {
453     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
454   }
455   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
456   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "PCApplyTranspose_GASM"
462 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
463 {
464   PC_GASM         *osm = (PC_GASM*)pc->data;
465   PetscErrorCode ierr;
466   PetscInt       i;
467   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
468 
469   PetscFunctionBegin;
470   /*
471      Support for limiting the restriction or interpolation to only local
472      subdomain values (leaving the other values 0).
473 
474      Note: these are reversed from the PCApply_GASM() because we are applying the
475      transpose of the three terms
476   */
477   if (!(osm->type & PC_GASM_INTERPOLATE)) {
478     forward = SCATTER_FORWARD_LOCAL;
479     /* have to zero the work RHS since scatter may leave some slots empty */
480     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
481   }
482   if (!(osm->type & PC_GASM_RESTRICT)) {
483     reverse = SCATTER_REVERSE_LOCAL;
484   }
485 
486   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
487   ierr = VecZeroEntries(y);CHKERRQ(ierr);
488   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
489   /* do the local solves */
490   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
491     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
492   }
493   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "PCDestroy_GASM"
500 static PetscErrorCode PCDestroy_GASM(PC pc)
501 {
502   PC_GASM         *osm = (PC_GASM*)pc->data;
503   PetscErrorCode ierr;
504   PetscInt       i;
505 
506   PetscFunctionBegin;
507   if (osm->ksp) {
508     for (i=0; i<osm->n; i++) {
509       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
510     }
511     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
512   }
513   if (osm->pmat) {
514     if (osm->n > 0) {
515       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
516     }
517   }
518   for (i=0; i<osm->n; i++) {
519     ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
520     ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
521   }
522   ierr = PetscFree(osm->x);CHKERRQ(ierr);
523   ierr = PetscFree(osm->y);CHKERRQ(ierr);
524   ierr = VecDestroy(osm->gx); CHKERRQ(ierr);
525   ierr = VecDestroy(osm->gy); CHKERRQ(ierr);
526 
527   ierr = VecScatterDestroy(osm->grestriction); CHKERRQ(ierr);
528   ierr = VecScatterDestroy(osm->gprolongation); CHKERRQ(ierr);
529 
530   if (osm->is) {
531     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
532   }
533   ierr = PetscFree(osm);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "PCSetFromOptions_GASM"
539 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
540   PC_GASM         *osm = (PC_GASM*)pc->data;
541   PetscErrorCode ierr;
542   PetscInt       blocks,ovl;
543   PetscBool      symset,flg;
544   PCGASMType      gasmtype;
545 
546   PetscFunctionBegin;
547   /* set the type to symmetric if matrix is symmetric */
548   if (!osm->type_set && pc->pmat) {
549     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
550     if (symset && flg) { osm->type = PC_GASM_BASIC; }
551   }
552   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
553     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
554     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
555     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
556     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
557     flg  = PETSC_FALSE;
558     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
559     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
560   ierr = PetscOptionsTail();CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
564 /*------------------------------------------------------------------------------------*/
565 
566 EXTERN_C_BEGIN
567 #undef __FUNCT__
568 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
569 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
570 {
571   PC_GASM         *osm = (PC_GASM*)pc->data;
572   PetscErrorCode ierr;
573   PetscInt       i;
574 
575   PetscFunctionBegin;
576   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
577   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
578 
579   if (!pc->setupcalled) {
580     osm->n            = n;
581     osm->is           = 0;
582     osm->is_local     = 0;
583     if (is) {
584       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
585     }
586     if (is_local) {
587       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
588     }
589     if (osm->is) {
590       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
591     }
592     if (is) {
593       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
594       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
595       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
596       osm->overlap = -1;
597     }
598     if (is_local) {
599       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
600       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
601     }
602   }
603   PetscFunctionReturn(0);
604 }
605 EXTERN_C_END
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
610 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
611   PC_GASM         *osm = (PC_GASM*)pc->data;
612   PetscErrorCode ierr;
613   PetscMPIInt    rank,size;
614   PetscInt       n;
615 
616   PetscFunctionBegin;
617   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
618 
619   /*
620      Split the subdomains equally among all processors
621   */
622   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
623   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
624   n = N/size + ((N % size) > rank);
625   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
626   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
627   if (!pc->setupcalled) {
628     if (osm->is) {
629       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
630     }
631     osm->N            = N;
632     osm->n            = n;
633     osm->is           = 0;
634     osm->is_local     = 0;
635   }
636   PetscFunctionReturn(0);
637 }/* PCGASMSetTotalSubdomains_GASM() */
638 EXTERN_C_END
639 
640 EXTERN_C_BEGIN
641 #undef __FUNCT__
642 #define __FUNCT__ "PCGASMSetOverlap_GASM"
643 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
644 {
645   PC_GASM *osm = (PC_GASM*)pc->data;
646 
647   PetscFunctionBegin;
648   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
649   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
650   if (!pc->setupcalled) {
651     osm->overlap = ovl;
652   }
653   PetscFunctionReturn(0);
654 }
655 EXTERN_C_END
656 
657 EXTERN_C_BEGIN
658 #undef __FUNCT__
659 #define __FUNCT__ "PCGASMSetType_GASM"
660 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
661 {
662   PC_GASM *osm = (PC_GASM*)pc->data;
663 
664   PetscFunctionBegin;
665   osm->type     = type;
666   osm->type_set = PETSC_TRUE;
667   PetscFunctionReturn(0);
668 }
669 EXTERN_C_END
670 
671 EXTERN_C_BEGIN
672 #undef __FUNCT__
673 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
674 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
675 {
676   PC_GASM *osm = (PC_GASM*)pc->data;
677 
678   PetscFunctionBegin;
679   osm->sort_indices = doSort;
680   PetscFunctionReturn(0);
681 }
682 EXTERN_C_END
683 
684 EXTERN_C_BEGIN
685 #undef __FUNCT__
686 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
687 /*
688    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
689         In particular, it would upset the global subdomain number calculation.
690 */
691 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
692 {
693   PC_GASM         *osm = (PC_GASM*)pc->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
698 
699   if (n) {
700     *n = osm->n;
701   }
702   if (first) {
703     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
704     *first -= osm->n;
705   }
706   if (ksp) {
707     /* Assume that local solves are now different; not necessarily
708        true though!  This flag is used only for PCView_GASM() */
709     *ksp                   = osm->ksp;
710     osm->same_local_solves = PETSC_FALSE;
711   }
712   PetscFunctionReturn(0);
713 }/* PCGASMGetSubKSP_GASM() */
714 EXTERN_C_END
715 
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "PCGASMSetLocalSubdomains"
719 /*@C
720     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
721     only) for the additive Schwarz preconditioner.
722 
723     Collective on PC
724 
725     Input Parameters:
726 +   pc - the preconditioner context
727 .   n - the number of subdomains for this processor (default value = 1)
728 .   is - the index set that defines the subdomains for this processor
729          (or PETSC_NULL for PETSc to determine subdomains)
730 -   is_local - the index sets that define the local part of the subdomains for this processor
731          (or PETSC_NULL to use the default of 1 subdomain per process)
732 
733     Notes:
734     The IS numbering is in the parallel, global numbering of the vector.
735 
736     By default the GASM preconditioner uses 1 block per processor.
737 
738     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
739 
740     Level: advanced
741 
742 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
743 
744 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
745           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
746 @*/
747 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
748 {
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
753   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "PCGASMSetTotalSubdomains"
759 /*@C
760     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
761     additive Schwarz preconditioner.  Either all or no processors in the
762     PC communicator must call this routine, with the same index sets.
763 
764     Collective on PC
765 
766     Input Parameters:
767 +   pc - the preconditioner context
768 .   n - the number of subdomains for all processors
769 .   is - the index sets that define the subdomains for all processor
770          (or PETSC_NULL for PETSc to determine subdomains)
771 -   is_local - the index sets that define the local part of the subdomains for this processor
772          (or PETSC_NULL to use the default of 1 subdomain per process)
773 
774     Options Database Key:
775     To set the total number of subdomain blocks rather than specify the
776     index sets, use the option
777 .    -pc_gasm_blocks <blks> - Sets total blocks
778 
779     Notes:
780     Currently you cannot use this to set the actual subdomains with the argument is.
781 
782     By default the GASM preconditioner uses 1 block per processor.
783 
784     These index sets cannot be destroyed until after completion of the
785     linear solves for which the GASM preconditioner is being used.
786 
787     Use PCGASMSetLocalSubdomains() to set local subdomains.
788 
789     Level: advanced
790 
791 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
792 
793 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
794           PCGASMCreateSubdomains2D()
795 @*/
796 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
797 {
798   PetscErrorCode ierr;
799 
800   PetscFunctionBegin;
801   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
802   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "PCGASMSetOverlap"
808 /*@
809     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
810     additive Schwarz preconditioner.  Either all or no processors in the
811     PC communicator must call this routine.
812 
813     Logically Collective on PC
814 
815     Input Parameters:
816 +   pc  - the preconditioner context
817 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
818 
819     Options Database Key:
820 .   -pc_gasm_overlap <ovl> - Sets overlap
821 
822     Notes:
823     By default the GASM preconditioner uses 1 block per processor.  To use
824     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
825     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
826 
827     The overlap defaults to 1, so if one desires that no additional
828     overlap be computed beyond what may have been set with a call to
829     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
830     must be set to be 0.  In particular, if one does not explicitly set
831     the subdomains an application code, then all overlap would be computed
832     internally by PETSc, and using an overlap of 0 would result in an GASM
833     variant that is equivalent to the block Jacobi preconditioner.
834 
835     Note that one can define initial index sets with any overlap via
836     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
837     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
838     if desired.
839 
840     Level: intermediate
841 
842 .keywords: PC, GASM, set, overlap
843 
844 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
845           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
846 @*/
847 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
848 {
849   PetscErrorCode ierr;
850 
851   PetscFunctionBegin;
852   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
853   PetscValidLogicalCollectiveInt(pc,ovl,2);
854   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "PCGASMSetType"
860 /*@
861     PCGASMSetType - Sets the type of restriction and interpolation used
862     for local problems in the additive Schwarz method.
863 
864     Logically Collective on PC
865 
866     Input Parameters:
867 +   pc  - the preconditioner context
868 -   type - variant of GASM, one of
869 .vb
870       PC_GASM_BASIC       - full interpolation and restriction
871       PC_GASM_RESTRICT    - full restriction, local processor interpolation
872       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
873       PC_GASM_NONE        - local processor restriction and interpolation
874 .ve
875 
876     Options Database Key:
877 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
878 
879     Level: intermediate
880 
881 .keywords: PC, GASM, set, type
882 
883 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
884           PCGASMCreateSubdomains2D()
885 @*/
886 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
887 {
888   PetscErrorCode ierr;
889 
890   PetscFunctionBegin;
891   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
892   PetscValidLogicalCollectiveEnum(pc,type,2);
893   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "PCGASMSetSortIndices"
899 /*@
900     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
901 
902     Logically Collective on PC
903 
904     Input Parameters:
905 +   pc  - the preconditioner context
906 -   doSort - sort the subdomain indices
907 
908     Level: intermediate
909 
910 .keywords: PC, GASM, set, type
911 
912 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
913           PCGASMCreateSubdomains2D()
914 @*/
915 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
916 {
917   PetscErrorCode ierr;
918 
919   PetscFunctionBegin;
920   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
921   PetscValidLogicalCollectiveBool(pc,doSort,2);
922   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "PCGASMGetSubKSP"
928 /*@C
929    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
930    this processor.
931 
932    Collective on PC iff first_local is requested
933 
934    Input Parameter:
935 .  pc - the preconditioner context
936 
937    Output Parameters:
938 +  n_local - the number of blocks on this processor or PETSC_NULL
939 .  first_local - the global number of the first block on this processor or PETSC_NULL,
940                  all processors must request or all must pass PETSC_NULL
941 -  ksp - the array of KSP contexts
942 
943    Note:
944    After PCGASMGetSubKSP() the array of KSPes is not to be freed
945 
946    Currently for some matrix implementations only 1 block per processor
947    is supported.
948 
949    You must call KSPSetUp() before calling PCGASMGetSubKSP().
950 
951    Level: advanced
952 
953 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
954 
955 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
956           PCGASMCreateSubdomains2D(),
957 @*/
958 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
964   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 /* -------------------------------------------------------------------------------------*/
969 /*MC
970    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
971            its own KSP object.
972 
973    Options Database Keys:
974 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
975 .  -pc_gasm_blocks <blks> - Sets total blocks
976 .  -pc_gasm_overlap <ovl> - Sets overlap
977 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
978 
979      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
980       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
981       -pc_gasm_type basic to use the standard GASM.
982 
983    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
984      than one processor. Defaults to one block per processor.
985 
986      To set options on the solvers for each block append -sub_ to all the KSP, and PC
987         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
988 
989      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
990          and set the options directly on the resulting KSP object (you can access its PC
991          with KSPGetPC())
992 
993 
994    Level: beginner
995 
996    Concepts: additive Schwarz method
997 
998     References:
999     An additive variant of the Schwarz alternating method for the case of many subregions
1000     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1001 
1002     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1003     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1004 
1005 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1006            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1007            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1008 
1009 M*/
1010 
1011 EXTERN_C_BEGIN
1012 #undef __FUNCT__
1013 #define __FUNCT__ "PCCreate_GASM"
1014 PetscErrorCode  PCCreate_GASM(PC pc)
1015 {
1016   PetscErrorCode ierr;
1017   PC_GASM         *osm;
1018 
1019   PetscFunctionBegin;
1020   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1021   osm->N                 = PETSC_DECIDE;
1022   osm->n                 = 0;
1023   osm->nmax              = 0;
1024   osm->overlap           = 1;
1025   osm->ksp               = 0;
1026   osm->grestriction      = 0;
1027   osm->gprolongation     = 0;
1028   osm->gx                = 0;
1029   osm->gy                = 0;
1030   osm->x                 = 0;
1031   osm->y                 = 0;
1032   osm->is                = 0;
1033   osm->is_local          = 0;
1034   osm->pmat              = 0;
1035   osm->type              = PC_GASM_RESTRICT;
1036   osm->same_local_solves = PETSC_TRUE;
1037   osm->sort_indices      = PETSC_TRUE;
1038 
1039   pc->data                   = (void*)osm;
1040   pc->ops->apply             = PCApply_GASM;
1041   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1042   pc->ops->setup             = PCSetUp_GASM;
1043   pc->ops->destroy           = PCDestroy_GASM;
1044   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1045   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1046   pc->ops->view              = PCView_GASM;
1047   pc->ops->applyrichardson   = 0;
1048 
1049   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1050                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1051   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1052                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1054                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1055   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1056                     PCGASMSetType_GASM);CHKERRQ(ierr);
1057   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1058                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1059   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1060                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 EXTERN_C_END
1064 
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "PCGASMCreateSubdomains"
1068 /*@C
1069    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1070    preconditioner for a any problem on a general grid.
1071 
1072    Collective
1073 
1074    Input Parameters:
1075 +  A - The global matrix operator
1076 -  n - the number of local blocks
1077 
1078    Output Parameters:
1079 .  outis - the array of index sets defining the subdomains
1080 
1081    Level: advanced
1082 
1083    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1084     from these if you use PCGASMSetLocalSubdomains()
1085 
1086     In the Fortran version you must provide the array outis[] already allocated of length n.
1087 
1088 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1089 
1090 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1091 @*/
1092 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1093 {
1094   MatPartitioning           mpart;
1095   const char                *prefix;
1096   PetscErrorCode            (*f)(Mat,PetscBool *,MatReuse,Mat*);
1097   PetscMPIInt               size;
1098   PetscInt                  i,j,rstart,rend,bs;
1099   PetscBool                 iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1100   Mat                       Ad = PETSC_NULL, adj;
1101   IS                        ispart,isnumb,*is;
1102   PetscErrorCode            ierr;
1103 
1104   PetscFunctionBegin;
1105   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1106   PetscValidPointer(outis,3);
1107   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1108 
1109   /* Get prefix, row distribution, and block size */
1110   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1111   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1112   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1113   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1114 
1115   /* Get diagonal block from matrix if possible */
1116   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1117   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1118   if (f) {
1119     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1120   } else if (size == 1) {
1121     iscopy = PETSC_FALSE; Ad = A;
1122   } else {
1123     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1124   }
1125   if (Ad) {
1126     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1127     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1128   }
1129   if (Ad && n > 1) {
1130     PetscBool  match,done;
1131     /* Try to setup a good matrix partitioning if available */
1132     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1133     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1134     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1135     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1136     if (!match) {
1137       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1138     }
1139     if (!match) { /* assume a "good" partitioner is available */
1140       PetscInt na,*ia,*ja;
1141       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1142       if (done) {
1143 	/* Build adjacency matrix by hand. Unfortunately a call to
1144 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1145 	   remove the block-aij structure and we cannot expect
1146 	   MatPartitioning to split vertices as we need */
1147 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1148 	nnz = 0;
1149 	for (i=0; i<na; i++) { /* count number of nonzeros */
1150 	  len = ia[i+1] - ia[i];
1151 	  row = ja + ia[i];
1152 	  for (j=0; j<len; j++) {
1153 	    if (row[j] == i) { /* don't count diagonal */
1154 	      len--; break;
1155 	    }
1156 	  }
1157 	  nnz += len;
1158 	}
1159 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1160 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1161 	nnz    = 0;
1162 	iia[0] = 0;
1163 	for (i=0; i<na; i++) { /* fill adjacency */
1164 	  cnt = 0;
1165 	  len = ia[i+1] - ia[i];
1166 	  row = ja + ia[i];
1167 	  for (j=0; j<len; j++) {
1168 	    if (row[j] != i) { /* if not diagonal */
1169 	      jja[nnz+cnt++] = row[j];
1170 	    }
1171 	  }
1172 	  nnz += cnt;
1173 	  iia[i+1] = nnz;
1174 	}
1175 	/* Partitioning of the adjacency matrix */
1176 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1177 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1178 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1179 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1180 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1181 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1182 	foundpart = PETSC_TRUE;
1183       }
1184       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1185     }
1186     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1187   }
1188   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1189 
1190   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1191   *outis = is;
1192 
1193   if (!foundpart) {
1194 
1195     /* Partitioning by contiguous chunks of rows */
1196 
1197     PetscInt mbs   = (rend-rstart)/bs;
1198     PetscInt start = rstart;
1199     for (i=0; i<n; i++) {
1200       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1201       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1202       start += count;
1203     }
1204 
1205   } else {
1206 
1207     /* Partitioning by adjacency of diagonal block  */
1208 
1209     const PetscInt *numbering;
1210     PetscInt       *count,nidx,*indices,*newidx,start=0;
1211     /* Get node count in each partition */
1212     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1213     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1214     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1215       for (i=0; i<n; i++) count[i] *= bs;
1216     }
1217     /* Build indices from node numbering */
1218     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1219     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1220     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1221     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1222     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1223     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1224     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1225       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1226       for (i=0; i<nidx; i++)
1227 	for (j=0; j<bs; j++)
1228 	  newidx[i*bs+j] = indices[i]*bs + j;
1229       ierr = PetscFree(indices);CHKERRQ(ierr);
1230       nidx   *= bs;
1231       indices = newidx;
1232     }
1233     /* Shift to get global indices */
1234     for (i=0; i<nidx; i++) indices[i] += rstart;
1235 
1236     /* Build the index sets for each block */
1237     for (i=0; i<n; i++) {
1238       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1239       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1240       start += count[i];
1241     }
1242 
1243     ierr = PetscFree(count);
1244     ierr = PetscFree(indices);
1245     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1246     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1247 
1248   }
1249 
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "PCGASMDestroySubdomains"
1255 /*@C
1256    PCGASMDestroySubdomains - Destroys the index sets created with
1257    PCGASMCreateSubdomains(). Should be called after setting subdomains
1258    with PCGASMSetLocalSubdomains().
1259 
1260    Collective
1261 
1262    Input Parameters:
1263 +  n - the number of index sets
1264 .  is - the array of index sets
1265 -  is_local - the array of local index sets, can be PETSC_NULL
1266 
1267    Level: advanced
1268 
1269 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1270 
1271 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1272 @*/
1273 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1274 {
1275   PetscInt       i;
1276   PetscErrorCode ierr;
1277   PetscFunctionBegin;
1278   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1279   PetscValidPointer(is,2);
1280   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1281   ierr = PetscFree(is);CHKERRQ(ierr);
1282   if (is_local) {
1283     PetscValidPointer(is_local,3);
1284     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1285     ierr = PetscFree(is_local);CHKERRQ(ierr);
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 
1291 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1292 {                                                                                                       \
1293  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1294   /*                                                                                                    \
1295    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1296    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1297    subdomain).                                                                                          \
1298    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1299    of the intersection.                                                                                 \
1300   */                                                                                                    \
1301   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1302   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1303   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1304   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1305   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1306   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1307   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1308   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1309   /* Now compute the size of the local subdomain n. */ \
1310   *n = 0;                                               \
1311   if(*ylow_loc < *yhigh_loc) {                           \
1312     PetscInt width = xright-xleft;                     \
1313     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1314     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1315     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1316   }\
1317 }
1318 
1319 
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1323 /*@
1324    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1325    preconditioner for a two-dimensional problem on a regular grid.
1326 
1327    Collective
1328 
1329    Input Parameters:
1330 +  M, N - the global number of mesh points in the x and y directions
1331 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1332 .  dof - degrees of freedom per node
1333 -  overlap - overlap in mesh lines
1334 
1335    Output Parameters:
1336 +  Nsub - the number of local subdomains created
1337 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1338 -  is_local - array of index sets defining non-overlapping subdomains
1339 
1340    Note:
1341    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1342    preconditioners.  More general related routines are
1343    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1344 
1345    Level: advanced
1346 
1347 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1348 
1349 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1350           PCGASMSetOverlap()
1351 @*/
1352 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1353 {
1354   PetscErrorCode ierr;
1355   PetscMPIInt size, rank;
1356   PetscInt i, j;
1357   PetscInt maxheight, maxwidth;
1358   PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1359   PetscInt ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1360   PetscInt x[2][2], y[2][2], n[2];
1361   PetscInt first, last;
1362   PetscInt nidx, *idx;
1363   PetscInt ii,jj,s,q,d;
1364   PetscInt k,kk;
1365   PetscMPIInt color;
1366   MPI_Comm comm, subcomm;
1367 
1368   IS **iis;
1369   PetscFunctionBegin;
1370   ierr = PetscObjectGetComm((PetscObject)pc, &comm);     CHKERRQ(ierr);
1371   ierr = MPI_Comm_size(comm, &size);                     CHKERRQ(ierr);
1372   ierr = MPI_Comm_rank(comm, &rank);                     CHKERRQ(ierr);
1373   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);  CHKERRQ(ierr);
1374   if(first%dof || last%dof) {
1375     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1376 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1377   }
1378   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1379   s = 0;
1380   ystart = 0;
1381   for (j=0; j<Ndomains; ++j) {
1382     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1383     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1384     /* Vertical domain limits with an overlap. */
1385     ylow = PetscMax(ystart - overlap,0);
1386     yhigh = PetscMin(ystart + maxheight + overlap,N);
1387     xstart = 0;
1388     for (i=0; i<Mdomains; ++i) {
1389       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1390       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1391       /* Horizontal domain limits with an overlap. */
1392       xleft   = PetscMax(xstart - overlap,0);
1393       xright  = PetscMin(xstart + maxwidth + overlap,M);
1394       /*
1395 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1396       */
1397       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1398       if(nidx) {
1399         ++s;
1400       }
1401       xstart += maxwidth;
1402     }/* for(i = 0; i < Mdomains; ++i) */
1403     ystart += maxheight;
1404   }/* for(j = 0; j < Ndomains; ++j) */
1405   /* Now we can allocate the necessary number of ISs. */
1406   *nsub = s;
1407   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);         CHKERRQ(ierr);
1408   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);   CHKERRQ(ierr);
1409   s = 0;
1410   ystart = 0;
1411   for (j=0; j<Ndomains; ++j) {
1412     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1413     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1414     /* Vertical domain limits with an overlap. */
1415     y[0][0] = PetscMax(ystart - overlap,0);
1416     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1417     /* Vertical domain limits without an overlap. */
1418     y[1][0] = ystart;
1419     y[1][1] = PetscMin(ystart + maxheight,N);
1420     xstart = 0;
1421     for (i=0; i<Mdomains; ++i) {
1422       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1423       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1424       /* Horizontal domain limits with an overlap. */
1425       x[0][0]  = PetscMax(xstart - overlap,0);
1426       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1427       /* Horizontal domain limits without an overlap. */
1428       x[1][0] = xstart;
1429       x[1][1] = PetscMin(xstart+maxwidth,M);
1430       /*
1431 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1432 	 Do this twice: first for the domains with overlaps, and once without.
1433 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1434       */
1435       for(q = 0; q < 2; ++q) {
1436 	/*
1437 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1438 	  according to whether the domain with an overlap or without is considered.
1439 	*/
1440 	xleft = x[q][0]; xright = x[q][1];
1441 	ylow  = y[q][0]; yhigh  = y[q][1];
1442         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1443 	nidx *= dof;
1444         n[q] = nidx;
1445         /*
1446          Based on the counted number of indices in the local domain *with an overlap*,
1447          construct a subcommunicator of all the processors supporting this domain.
1448          Observe that a domain with an overlap might have nontrivial local support,
1449          while the domain without an overlap might not.  Hence, the decision to participate
1450          in the subcommunicator must be based on the domain with an overlap.
1451          */
1452 	if(q == 0) {
1453 	  if(nidx) {
1454 	    color = 1;
1455 	  }
1456 	  else {
1457 	    color = MPI_UNDEFINED;
1458 	  }
1459 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm); CHKERRQ(ierr);
1460 	}
1461         /*
1462          Proceed only if the number of local indices *with an overlap* is nonzero.
1463          */
1464         if(n[0]) {
1465           if(q == 0) {
1466             iis = is;
1467           }
1468           if(q == 1) {
1469             /*
1470              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1471              Moreover, if the overlap is zero, the two ISs are identical.
1472              */
1473             if(overlap == 0) {
1474               (*is_local)[s] = (*is)[s];
1475               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1476               continue;
1477             }
1478             else {
1479               iis = is_local;
1480               subcomm = ((PetscObject)(*is)[s])->comm;
1481             }
1482           }/* if(q == 1) */
1483           idx = PETSC_NULL;
1484 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1485           if(nidx) {
1486             k    = 0;
1487             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1488               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1489               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1490               kk = dof*(M*jj + x0);
1491               for (ii=x0; ii<x1; ++ii) {
1492                 for(d = 0; d < dof; ++d) {
1493                   idx[k++] = kk++;
1494                 }
1495               }
1496             }
1497           }
1498 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1499 	}/* if(n[0]) */
1500       }/* for(q = 0; q < 2; ++q) */
1501       if(n[0]) {
1502         ++s;
1503       }
1504       xstart += maxwidth;
1505     }/* for(i = 0; i < Mdomains; ++i) */
1506     ystart += maxheight;
1507   }/* for(j = 0; j < Ndomains; ++j) */
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1513 /*@C
1514     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1515     only) for the additive Schwarz preconditioner.
1516 
1517     Not Collective
1518 
1519     Input Parameter:
1520 .   pc - the preconditioner context
1521 
1522     Output Parameters:
1523 +   n - the number of subdomains for this processor (default value = 1)
1524 .   is - the index sets that define the subdomains for this processor
1525 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1526 
1527 
1528     Notes:
1529     The IS numbering is in the parallel, global numbering of the vector.
1530 
1531     Level: advanced
1532 
1533 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1534 
1535 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1536           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1537 @*/
1538 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1539 {
1540   PC_GASM         *osm;
1541   PetscErrorCode ierr;
1542   PetscBool      match;
1543 
1544   PetscFunctionBegin;
1545   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1546   PetscValidIntPointer(n,2);
1547   if (is) PetscValidPointer(is,3);
1548   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1549   if (!match) {
1550     if (n)  *n  = 0;
1551     if (is) *is = PETSC_NULL;
1552   } else {
1553     osm = (PC_GASM*)pc->data;
1554     if (n)  *n  = osm->n;
1555     if (is) *is = osm->is;
1556     if (is_local) *is_local = osm->is_local;
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1563 /*@C
1564     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1565     only) for the additive Schwarz preconditioner.
1566 
1567     Not Collective
1568 
1569     Input Parameter:
1570 .   pc - the preconditioner context
1571 
1572     Output Parameters:
1573 +   n - the number of matrices for this processor (default value = 1)
1574 -   mat - the matrices
1575 
1576 
1577     Level: advanced
1578 
1579 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1580 
1581 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1582           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1583 @*/
1584 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1585 {
1586   PC_GASM         *osm;
1587   PetscErrorCode ierr;
1588   PetscBool      match;
1589 
1590   PetscFunctionBegin;
1591   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1592   PetscValidIntPointer(n,2);
1593   if (mat) PetscValidPointer(mat,3);
1594   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1595   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1596   if (!match) {
1597     if (n)   *n   = 0;
1598     if (mat) *mat = PETSC_NULL;
1599   } else {
1600     osm = (PC_GASM*)pc->data;
1601     if (n)   *n   = osm->n;
1602     if (mat) *mat = osm->pmat;
1603   }
1604   PetscFunctionReturn(0);
1605 }
1606