xref: /petsc/src/ksp/pc/impls/gasm/gasm.c (revision e4ed7901070ce1ca36eb7d68dd07557542e66e31)
1 
2 /*
3   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
4   In this version each processor may have any number of subdomains and a subdomain may live on multiple
5   processors.
6 
7        N    - total number of subdomains on all processors
8        n    - actual number of subdomains on this processor
9        nmax - maximum number of subdomains per processor
10 */
11 #include <petsc-private/pcimpl.h>     /*I "petscpc.h" I*/
12 
13 typedef struct {
14   PetscInt   N,n,nmax;
15   PetscInt   overlap;             /* overlap requested by user */
16   KSP        *ksp;                /* linear solvers for each block */
17   Vec        gx,gy;               /* Merged work vectors */
18   Vec        *x,*y;               /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
19   IS         gis, gis_local;      /* merged ISs */
20   VecScatter grestriction;        /* merged restriction */
21   VecScatter gprolongation;       /* merged prolongation */
22   IS         *is;                 /* index set that defines each overlapping subdomain */
23   IS         *is_local;           /* index set that defines each local subdomain (same as subdomain with the overlap removed); may be NULL */
24   Mat        *pmat;               /* subdomain block matrices */
25   PCGASMType  type;               /* use reduced interpolation, restriction or both */
26   PetscBool  type_set;            /* if user set this value (so won't change it for symmetric problems) */
27   PetscBool  same_local_solves;   /* flag indicating whether all local solvers are same */
28   PetscBool  sort_indices;        /* flag to sort subdomain indices */
29 } PC_GASM;
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "PCGASMPrintSubdomains"
33 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
34 {
35   PC_GASM         *osm  = (PC_GASM*)pc->data;
36   const char     *prefix;
37   char           fname[PETSC_MAX_PATH_LEN+1];
38   PetscViewer    viewer, sviewer;
39   PetscInt       i,j,nidx;
40   const PetscInt *idx;
41   char           *cidx;
42   PetscBool      found;
43   PetscMPIInt    size, rank;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
48   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
49   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
50   ierr = PetscOptionsGetString(prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);CHKERRQ(ierr);
51   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
52   for (i=0;i<osm->n;++i) {
53     ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
54     ierr = ISGetLocalSize(osm->is[i], &nidx);CHKERRQ(ierr);
55     /*
56      No more than 15 characters per index plus a space.
57      PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
58      in case nidx == 0. That will take care of the space for the trailing '\0' as well.
59      For nidx == 0, the whole string 16 '\0'.
60      */
61     ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
62     ierr = ISGetIndices(osm->is[i], &idx);CHKERRQ(ierr);
63     ierr = PetscViewerStringOpen(((PetscObject)(osm->is[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
64     for(j = 0; j < nidx; ++j) {
65       ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
66     }
67     ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
68     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
69 
70     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
71     ierr = PetscViewerASCIIPrintf(viewer, "Subdomain with overlap\n");CHKERRQ(ierr);
72     ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
73     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
74     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
75     ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
76     ierr = PetscFree(cidx);CHKERRQ(ierr);
77     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
78     if (osm->is_local) {
79       ierr = PetscViewerASCIIOpen(((PetscObject)((osm->is)[i]))->comm,fname,&viewer);CHKERRQ(ierr);
80       ierr = ISGetLocalSize(osm->is_local[i], &nidx);CHKERRQ(ierr);
81       /*
82        No more than 15 characters per index plus a space.
83        PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
84        in case nidx == 0. That will take care of the space for the trailing '\0' as well.
85        For nidx == 0, the whole string 16 '\0'.
86        */
87       ierr = PetscMalloc(sizeof(char)*(16*(nidx+1)+1), &cidx);CHKERRQ(ierr);
88       ierr = ISGetIndices(osm->is_local[i], &idx);CHKERRQ(ierr);
89       ierr = PetscViewerStringOpen(((PetscObject)(osm->is_local[i]))->comm, cidx, 16*(nidx+1), &sviewer);CHKERRQ(ierr);
90       for(j = 0; j < nidx; ++j) {
91         ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
92       }
93       ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
94       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
95 
96       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
97       ierr = PetscViewerASCIIPrintf(viewer, "Subdomain without overlap\n");CHKERRQ(ierr);
98       ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
99       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
100       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
101       ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
102       ierr = PetscFree(cidx);CHKERRQ(ierr);
103       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
104     }
105   }
106   PetscFunctionReturn(0);
107 }
108 
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "PCView_GASM"
112 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
113 {
114   PC_GASM         *osm = (PC_GASM*)pc->data;
115   const char     *prefix;
116   PetscErrorCode ierr;
117   PetscMPIInt    rank, size;
118   PetscInt       i,bsz;
119   PetscBool      iascii,isstring, print_subdomains=PETSC_FALSE;
120   PetscViewer    sviewer;
121 
122 
123   PetscFunctionBegin;
124   ierr = MPI_Comm_size(((PetscObject)pc)->comm, &size);CHKERRQ(ierr);
125   ierr = MPI_Comm_rank(((PetscObject)pc)->comm, &rank);CHKERRQ(ierr);
126   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
127   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
128   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
129   ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&print_subdomains,PETSC_NULL);CHKERRQ(ierr);
130   if (iascii) {
131     char overlaps[256] = "user-defined overlap",subdomains[256] = "total subdomains set";
132     if (osm->overlap >= 0) {ierr = PetscSNPrintf(overlaps,sizeof overlaps,"amount of overlap = %D",osm->overlap);CHKERRQ(ierr);}
133     if (osm->nmax > 0)     {ierr = PetscSNPrintf(subdomains,sizeof subdomains,"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);}
134     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
135     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d:%d] number of locally-supported subdomains = %D\n",(int)rank,(int)size,osm->n);CHKERRQ(ierr);
136     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
137     ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
138     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: %s, %s\n",subdomains,overlaps);CHKERRQ(ierr);
139     ierr = PetscViewerASCIIPrintf(viewer,"  Generalized additive Schwarz: restriction/interpolation type - %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
140     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
141     if (osm->same_local_solves) {
142       if (osm->ksp) {
143         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all subdomains, in the following KSP and PC objects:\n");CHKERRQ(ierr);
144         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
145         if (!rank) {
146           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
147           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
148           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
149         }
150         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
151       }
152     } else {
153       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
154       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each subdomain is in the following KSP and PC objects:\n");CHKERRQ(ierr);
155       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
156       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
157       for (i=0; i<osm->nmax; i++) {
158         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
159         if (i < osm->n) {
160           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
161           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_TRUE);CHKERRQ(ierr);
162           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d:%d] local subdomain number %D, size = %D\n",(int)rank,(int)size,i,bsz);CHKERRQ(ierr);
163           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
164           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
165           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
166           ierr = PetscViewerASCIISynchronizedAllow(sviewer,PETSC_FALSE);CHKERRQ(ierr);
167         }
168         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
169       }
170       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
171       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
172     }
173   } else if (isstring) {
174     ierr = PetscViewerStringSPrintf(viewer," subdomains=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCGASMTypes[osm->type]);CHKERRQ(ierr);
175     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
176     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
178   } else {
179     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PCGASM",((PetscObject)viewer)->type_name);
180   }
181   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
182   if(print_subdomains) {
183     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 
189 
190 
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "PCSetUp_GASM"
194 static PetscErrorCode PCSetUp_GASM(PC pc)
195 {
196   PC_GASM         *osm  = (PC_GASM*)pc->data;
197   PetscErrorCode ierr;
198   PetscBool      symset,flg;
199   PetscInt       i;
200   PetscMPIInt    rank, size;
201   MatReuse       scall = MAT_REUSE_MATRIX;
202   KSP            ksp;
203   PC             subpc;
204   const char     *prefix,*pprefix;
205   PetscInt       dn;       /* Number of indices in a single subdomain assigned to this processor. */
206   const PetscInt *didx;    /* Indices from a single subdomain assigned to this processor. */
207   PetscInt       ddn;      /* Number of indices in all subdomains assigned to this processor. */
208   PetscInt       *ddidx;   /* Indices of all subdomains assigned to this processor. */
209   IS             gid;      /* Identity IS of the size of all subdomains assigned to this processor. */
210   Vec            x,y;
211   PetscScalar    *gxarray, *gyarray;
212   PetscInt       gfirst, glast;
213   DM             *domain_dm = PETSC_NULL;
214 
215   PetscFunctionBegin;
216   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
217   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
218   if (!pc->setupcalled) {
219 
220     if (!osm->type_set) {
221       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
222       if (symset && flg) { osm->type = PC_GASM_BASIC; }
223     }
224 
225     if (osm->N == PETSC_DECIDE && osm->n < 1) {
226       /* no subdomains given, try pc->dm */
227       if(pc->dm) {
228         char      ddm_name[1024];
229         DM        ddm;
230         PetscBool flg;
231         PetscInt     num_domains, d;
232         char         **domain_names;
233         IS           *domain_is;
234         /* Allow the user to request a decomposition DM by name */
235         ierr = PetscStrncpy(ddm_name, "", 1024); CHKERRQ(ierr);
236         ierr = PetscOptionsString("-pc_asm_decomposition_dm", "Name of the DM defining the composition", "PCSetDM", ddm_name, ddm_name,1024,&flg); CHKERRQ(ierr);
237         if(flg) {
238           ierr = DMCreateDecompositionDM(pc->dm, ddm_name, &ddm); CHKERRQ(ierr);
239           if(!ddm) {
240             SETERRQ1(((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Uknown DM decomposition name %s", ddm_name);
241           }
242           ierr = PetscInfo(pc,"Using decomposition DM defined using options database\n");CHKERRQ(ierr);
243           ierr = PCSetDM(pc,ddm); CHKERRQ(ierr);
244         }
245         ierr = DMCreateDecomposition(pc->dm, &num_domains, &domain_names, &domain_is, &domain_dm);    CHKERRQ(ierr);
246         ierr = PCGASMSetLocalSubdomains(pc, num_domains, domain_is, PETSC_NULL);CHKERRQ(ierr);
247         for(d = 0; d < num_domains; ++d) {
248           ierr = PetscFree(domain_names[d]); CHKERRQ(ierr);
249           ierr = ISDestroy(&domain_is[d]);   CHKERRQ(ierr);
250         }
251         ierr = PetscFree(domain_names);CHKERRQ(ierr);
252         ierr = PetscFree(domain_is);CHKERRQ(ierr);
253       }
254       else { /* no dm, use one per processor */
255         osm->nmax = osm->n = 1;
256         ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
257         osm->N = size;
258       }
259     } else if (osm->N == PETSC_DECIDE) {
260       PetscInt inwork[2], outwork[2];
261       /* determine global number of subdomains and the max number of local subdomains */
262       inwork[0] = inwork[1] = osm->n;
263       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
264       osm->nmax = outwork[0];
265       osm->N    = outwork[1];
266     }
267     if (!osm->is){ /* create the index sets */
268       ierr = PCGASMCreateSubdomains(pc->pmat,osm->n,&osm->is);CHKERRQ(ierr);
269     }
270     if (!osm->is_local) {
271       /*
272 	 This indicates that osm->is should define a nonoverlapping decomposition
273 	 (there is no way to really guarantee that if subdomains are set by the user through PCGASMSetLocalSubdomains,
274 	  but the assumption is that either the user does the right thing, or subdomains in ossm->is have been created
275 	  via PCGASMCreateSubdomains, which guarantees a nonoverlapping decomposition).
276 	 Therefore, osm->is will be used to define osm->is_local.
277 	 If a nonzero overlap has been requested by the user, then osm->is will be expanded and will overlap,
278 	 so osm->is_local should obtain a copy of osm->is while they are still (presumably) nonoverlapping.
279 	 Otherwise (no overlap has been requested), osm->is_local are simply aliases for osm->is.
280       */
281       ierr = PetscMalloc(osm->n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
282       for (i=0; i<osm->n; i++) {
283         if (osm->overlap > 0) { /* With positive overlap, osm->is[i] will be modified */
284           ierr = ISDuplicate(osm->is[i],&osm->is_local[i]);CHKERRQ(ierr);
285           ierr = ISCopy(osm->is[i],osm->is_local[i]);CHKERRQ(ierr);
286         } else {
287           ierr = PetscObjectReference((PetscObject)osm->is[i]);CHKERRQ(ierr);
288           osm->is_local[i] = osm->is[i];
289         }
290       }
291     }
292     /* Beyond this point osm->is_local is not null. */
293     if (osm->overlap > 0) {
294       /* Extend the "overlapping" regions by a number of steps */
295       ierr = MatIncreaseOverlap(pc->pmat,osm->n,osm->is,osm->overlap);CHKERRQ(ierr);
296     }
297     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
298     flg  = PETSC_FALSE;
299     ierr = PetscOptionsGetBool(prefix,"-pc_gasm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
300     if (flg) { ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr); }
301 
302     if (osm->sort_indices) {
303       for (i=0; i<osm->n; i++) {
304         ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
305 	ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
306       }
307     }
308     /* Merge the ISs, create merged vectors and scatter contexts. */
309     /* Restriction ISs. */
310     ddn = 0;
311     for (i=0; i<osm->n; i++) {
312       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
313       ddn += dn;
314     }
315     ierr = PetscMalloc(ddn*sizeof(PetscInt), &ddidx);CHKERRQ(ierr);
316     ddn = 0;
317     for (i=0; i<osm->n; i++) {
318       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
319       ierr = ISGetIndices(osm->is[i],&didx);CHKERRQ(ierr);
320       ierr = PetscMemcpy(ddidx+ddn, didx, sizeof(PetscInt)*dn);CHKERRQ(ierr);
321       ierr = ISRestoreIndices(osm->is[i], &didx);CHKERRQ(ierr);
322       ddn += dn;
323     }
324     ierr = ISCreateGeneral(((PetscObject)(pc))->comm, ddn, ddidx, PETSC_OWN_POINTER, &osm->gis);CHKERRQ(ierr);
325     ierr = MatGetVecs(pc->pmat,&x,&y);CHKERRQ(ierr);
326     ierr = VecCreateMPI(((PetscObject)pc)->comm, ddn, PETSC_DECIDE, &osm->gx);CHKERRQ(ierr);
327     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
328     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
329     ierr = ISCreateStride(((PetscObject)pc)->comm,ddn,gfirst,1, &gid);CHKERRQ(ierr);
330     ierr = VecScatterCreate(x,osm->gis,osm->gx,gid, &(osm->grestriction));CHKERRQ(ierr);
331     ierr = ISDestroy(&gid);CHKERRQ(ierr);
332     /* Prolongation ISs */
333     { PetscInt       dn_local;       /* Number of indices in the local part of a single domain assigned to this processor. */
334       const PetscInt *didx_local;    /* Global indices from the local part of a single domain assigned to this processor. */
335       PetscInt       ddn_local;      /* Number of indices in the local part of the disjoint union all domains assigned to this processor. */
336       PetscInt       *ddidx_local;   /* Global indices of the local part of the disjoint union of all domains assigned to this processor. */
337       /**/
338       ISLocalToGlobalMapping ltog;          /* Map from global to local indices on the disjoint union of subdomains: "local" ind's run from 0 to ddn-1. */
339       PetscInt              *ddidx_llocal;  /* Mapped local indices of the disjoint union of local parts of subdomains. */
340       PetscInt               ddn_llocal;    /* Number of indices in ddidx_llocal; must equal ddn_local, or else gis_local is not a sub-IS of gis. */
341       IS                     gis_llocal;    /* IS with ddidx_llocal indices. */
342       PetscInt               j;
343       ddn_local = 0;
344       for (i=0; i<osm->n; i++) {
345 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
346 	ddn_local += dn_local;
347       }
348       ierr = PetscMalloc(ddn_local*sizeof(PetscInt), &ddidx_local);CHKERRQ(ierr);
349       ddn_local = 0;
350       for (i=0; i<osm->n; i++) {
351 	ierr = ISGetLocalSize(osm->is_local[i],&dn_local);CHKERRQ(ierr);
352 	ierr = ISGetIndices(osm->is_local[i],&didx_local);CHKERRQ(ierr);
353 	ierr = PetscMemcpy(ddidx_local+ddn_local, didx_local, sizeof(PetscInt)*dn_local);CHKERRQ(ierr);
354 	ierr = ISRestoreIndices(osm->is_local[i], &didx_local);CHKERRQ(ierr);
355 	ddn_local += dn_local;
356       }
357       ierr = PetscMalloc(sizeof(PetscInt)*ddn_local, &ddidx_llocal);CHKERRQ(ierr);
358       ierr = ISCreateGeneral(((PetscObject)pc)->comm, ddn_local, ddidx_local, PETSC_OWN_POINTER, &(osm->gis_local));CHKERRQ(ierr);
359       ierr = ISLocalToGlobalMappingCreateIS(osm->gis,&ltog);CHKERRQ(ierr);
360       ierr = ISGlobalToLocalMappingApply(ltog,IS_GTOLM_DROP,ddn_local,ddidx_local,&ddn_llocal,ddidx_llocal);CHKERRQ(ierr);
361       ierr = ISLocalToGlobalMappingDestroy(&ltog);CHKERRQ(ierr);
362       if (ddn_llocal != ddn_local) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"gis_local contains %D indices outside of gis", ddn_llocal - ddn_local);
363       /* Now convert these localized indices into the global indices into the merged output vector. */
364       ierr = VecGetOwnershipRange(osm->gy, &gfirst, &glast);CHKERRQ(ierr);
365       for(j=0; j < ddn_llocal; ++j) {
366 	ddidx_llocal[j] += gfirst;
367       }
368       ierr = ISCreateGeneral(PETSC_COMM_SELF,ddn_llocal,ddidx_llocal,PETSC_OWN_POINTER,&gis_llocal);CHKERRQ(ierr);
369       ierr = VecScatterCreate(y,osm->gis_local,osm->gy,gis_llocal,&osm->gprolongation);CHKERRQ(ierr);
370       ierr = ISDestroy(&gis_llocal);CHKERRQ(ierr);
371     }
372     /* Create the subdomain work vectors. */
373     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->x);CHKERRQ(ierr);
374     ierr = PetscMalloc(osm->n*sizeof(Vec),&osm->y);CHKERRQ(ierr);
375     ierr = VecGetOwnershipRange(osm->gx, &gfirst, &glast);CHKERRQ(ierr);
376     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
377     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
378     for (i=0, ddn=0; i<osm->n; ++i, ddn += dn) {
379       PetscInt dN;
380       ierr = ISGetLocalSize(osm->is[i],&dn);CHKERRQ(ierr);
381       ierr = ISGetSize(osm->is[i],&dN);CHKERRQ(ierr);
382       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gxarray+ddn,&osm->x[i]);CHKERRQ(ierr);
383       ierr = VecCreateMPIWithArray(((PetscObject)(osm->is[i]))->comm,dn,dN,gyarray+ddn,&osm->y[i]);CHKERRQ(ierr);
384     }
385     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
386     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
387     ierr = VecDestroy(&x);CHKERRQ(ierr);
388     ierr = VecDestroy(&y);CHKERRQ(ierr);
389     /* Create the local solvers */
390     ierr = PetscMalloc(osm->n*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
391     for (i=0; i<osm->n; i++) { /* KSPs are local */
392       ierr = KSPCreate(((PetscObject)(osm->is[i]))->comm,&ksp);CHKERRQ(ierr);
393       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
394       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
395       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
396       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
397       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
398       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
399       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
400       osm->ksp[i] = ksp;
401     }
402     scall = MAT_INITIAL_MATRIX;
403 
404   } else {
405     /*
406        Destroy the blocks from the previous iteration
407     */
408     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
409       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
410       scall = MAT_INITIAL_MATRIX;
411     }
412   }
413 
414   /*
415      Extract out the submatrices.
416   */
417   if(size > 1) {
418     ierr = MatGetSubMatricesParallel(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
419   }
420   else {
421     ierr = MatGetSubMatrices(pc->pmat,osm->n,osm->is, osm->is,scall,&osm->pmat);CHKERRQ(ierr);
422   }
423   if (scall == MAT_INITIAL_MATRIX) {
424     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
425     for (i=0; i<osm->n; i++) {
426       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
427       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
428     }
429   }
430 
431   /* Return control to the user so that the submatrices can be modified (e.g., to apply
432      different boundary conditions for the submatrices than for the global problem) */
433   ierr = PCModifySubMatrices(pc,osm->n,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
434 
435   /*
436      Loop over submatrices putting them into local ksp
437   */
438   for (i=0; i<osm->n; i++) {
439     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
440     if (!pc->setupcalled) {
441       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
442     }
443   }
444 
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "PCSetUpOnBlocks_GASM"
450 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
451 {
452   PC_GASM         *osm = (PC_GASM*)pc->data;
453   PetscErrorCode ierr;
454   PetscInt       i;
455 
456   PetscFunctionBegin;
457   for (i=0; i<osm->n; i++) {
458     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
459   }
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "PCApply_GASM"
465 static PetscErrorCode PCApply_GASM(PC pc,Vec x,Vec y)
466 {
467   PC_GASM         *osm = (PC_GASM*)pc->data;
468   PetscErrorCode ierr;
469   PetscInt       i;
470   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
471 
472   PetscFunctionBegin;
473   /*
474      Support for limiting the restriction or interpolation to only local
475      subdomain values (leaving the other values 0).
476   */
477   if (!(osm->type & PC_GASM_RESTRICT)) {
478     forward = SCATTER_FORWARD_LOCAL;
479     /* have to zero the work RHS since scatter may leave some slots empty */
480     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
481   }
482   if (!(osm->type & PC_GASM_INTERPOLATE)) {
483     reverse = SCATTER_REVERSE_LOCAL;
484   }
485 
486   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
487   ierr = VecZeroEntries(y);CHKERRQ(ierr);
488   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
489   /* do the subdomain solves */
490   for (i=0; i<osm->n; ++i) {
491     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
492   }
493   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
494   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "PCApplyTranspose_GASM"
500 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec x,Vec y)
501 {
502   PC_GASM         *osm = (PC_GASM*)pc->data;
503   PetscErrorCode ierr;
504   PetscInt       i;
505   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
506 
507   PetscFunctionBegin;
508   /*
509      Support for limiting the restriction or interpolation to only local
510      subdomain values (leaving the other values 0).
511 
512      Note: these are reversed from the PCApply_GASM() because we are applying the
513      transpose of the three terms
514   */
515   if (!(osm->type & PC_GASM_INTERPOLATE)) {
516     forward = SCATTER_FORWARD_LOCAL;
517     /* have to zero the work RHS since scatter may leave some slots empty */
518     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
519   }
520   if (!(osm->type & PC_GASM_RESTRICT)) {
521     reverse = SCATTER_REVERSE_LOCAL;
522   }
523 
524   ierr = VecScatterBegin(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
525   ierr = VecZeroEntries(y);CHKERRQ(ierr);
526   ierr = VecScatterEnd(osm->grestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
527   /* do the local solves */
528   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
529     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
530   }
531   ierr = VecScatterBegin(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
532   ierr = VecScatterEnd(osm->gprolongation,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "PCReset_GASM"
538 static PetscErrorCode PCReset_GASM(PC pc)
539 {
540   PC_GASM        *osm = (PC_GASM*)pc->data;
541   PetscErrorCode ierr;
542   PetscInt       i;
543 
544   PetscFunctionBegin;
545   if (osm->ksp) {
546     for (i=0; i<osm->n; i++) {
547       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
548     }
549   }
550   if (osm->pmat) {
551     if (osm->n > 0) {
552       ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
553     }
554   }
555   if (osm->x) {
556     for (i=0; i<osm->n; i++) {
557       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
558       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
559     }
560   }
561   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
562   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
563 
564   ierr = VecScatterDestroy(&osm->grestriction);CHKERRQ(ierr);
565   ierr = VecScatterDestroy(&osm->gprolongation);CHKERRQ(ierr);
566   if (osm->is) {
567     ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
568     osm->is = 0;
569     osm->is_local = 0;
570   }
571   ierr = ISDestroy(&osm->gis);CHKERRQ(ierr);
572   ierr = ISDestroy(&osm->gis_local);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "PCDestroy_GASM"
578 static PetscErrorCode PCDestroy_GASM(PC pc)
579 {
580   PC_GASM         *osm = (PC_GASM*)pc->data;
581   PetscErrorCode ierr;
582   PetscInt       i;
583 
584   PetscFunctionBegin;
585   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
586   if (osm->ksp) {
587     for (i=0; i<osm->n; i++) {
588       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
589     }
590     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
591   }
592   ierr = PetscFree(osm->x);CHKERRQ(ierr);
593   ierr = PetscFree(osm->y);CHKERRQ(ierr);
594   ierr = PetscFree(pc->data);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "PCSetFromOptions_GASM"
600 static PetscErrorCode PCSetFromOptions_GASM(PC pc) {
601   PC_GASM         *osm = (PC_GASM*)pc->data;
602   PetscErrorCode ierr;
603   PetscInt       blocks,ovl;
604   PetscBool      symset,flg;
605   PCGASMType      gasmtype;
606 
607   PetscFunctionBegin;
608   /* set the type to symmetric if matrix is symmetric */
609   if (!osm->type_set && pc->pmat) {
610     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
611     if (symset && flg) { osm->type = PC_GASM_BASIC; }
612   }
613   ierr = PetscOptionsHead("Generalized additive Schwarz options");CHKERRQ(ierr);
614     ierr = PetscOptionsInt("-pc_gasm_blocks","Number of subdomains","PCGASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
615     if (flg) {ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr); }
616     ierr = PetscOptionsInt("-pc_gasm_overlap","Number of grid points overlap","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
617     if (flg) {ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
618     flg  = PETSC_FALSE;
619     ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
620     if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr); }
621   ierr = PetscOptionsTail();CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 /*------------------------------------------------------------------------------------*/
626 
627 EXTERN_C_BEGIN
628 #undef __FUNCT__
629 #define __FUNCT__ "PCGASMSetLocalSubdomains_GASM"
630 PetscErrorCode  PCGASMSetLocalSubdomains_GASM(PC pc,PetscInt n,IS is[],IS is_local[])
631 {
632   PC_GASM         *osm = (PC_GASM*)pc->data;
633   PetscErrorCode ierr;
634   PetscInt       i;
635 
636   PetscFunctionBegin;
637   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
638   if (pc->setupcalled && (n != osm->n || is)) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetLocalSubdomains() should be called before calling PCSetUp().");
639 
640   if (!pc->setupcalled) {
641     osm->n            = n;
642     osm->is           = 0;
643     osm->is_local     = 0;
644     if (is) {
645       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
646     }
647     if (is_local) {
648       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
649     }
650     if (osm->is) {
651       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
652     }
653     if (is) {
654       ierr = PetscMalloc(n*sizeof(IS),&osm->is);CHKERRQ(ierr);
655       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
656       /* Flag indicating that the user has set overlapping subdomains so PCGASM should not increase their size. */
657       osm->overlap = -1;
658     }
659     if (is_local) {
660       ierr = PetscMalloc(n*sizeof(IS),&osm->is_local);CHKERRQ(ierr);
661       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 EXTERN_C_END
667 
668 EXTERN_C_BEGIN
669 #undef __FUNCT__
670 #define __FUNCT__ "PCGASMSetTotalSubdomains_GASM"
671 PetscErrorCode  PCGASMSetTotalSubdomains_GASM(PC pc,PetscInt N) {
672   PC_GASM         *osm = (PC_GASM*)pc->data;
673   PetscErrorCode ierr;
674   PetscMPIInt    rank,size;
675   PetscInt       n;
676 
677   PetscFunctionBegin;
678   if (N < 1) SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
679 
680   /*
681      Split the subdomains equally among all processors
682   */
683   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
684   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
685   n = N/size + ((N % size) > rank);
686   if (!n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
687   if (pc->setupcalled && n != osm->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before PCSetUp().");
688   if (!pc->setupcalled) {
689     if (osm->is) {
690       ierr = PCGASMDestroySubdomains(osm->n,osm->is,osm->is_local);CHKERRQ(ierr);
691     }
692     osm->N            = N;
693     osm->n            = n;
694     osm->is           = 0;
695     osm->is_local     = 0;
696   }
697   PetscFunctionReturn(0);
698 }/* PCGASMSetTotalSubdomains_GASM() */
699 EXTERN_C_END
700 
701 EXTERN_C_BEGIN
702 #undef __FUNCT__
703 #define __FUNCT__ "PCGASMSetOverlap_GASM"
704 PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
705 {
706   PC_GASM *osm = (PC_GASM*)pc->data;
707 
708   PetscFunctionBegin;
709   if (ovl < 0) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
710   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
711   if (!pc->setupcalled) {
712     osm->overlap = ovl;
713   }
714   PetscFunctionReturn(0);
715 }
716 EXTERN_C_END
717 
718 EXTERN_C_BEGIN
719 #undef __FUNCT__
720 #define __FUNCT__ "PCGASMSetType_GASM"
721 PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
722 {
723   PC_GASM *osm = (PC_GASM*)pc->data;
724 
725   PetscFunctionBegin;
726   osm->type     = type;
727   osm->type_set = PETSC_TRUE;
728   PetscFunctionReturn(0);
729 }
730 EXTERN_C_END
731 
732 EXTERN_C_BEGIN
733 #undef __FUNCT__
734 #define __FUNCT__ "PCGASMSetSortIndices_GASM"
735 PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool  doSort)
736 {
737   PC_GASM *osm = (PC_GASM*)pc->data;
738 
739   PetscFunctionBegin;
740   osm->sort_indices = doSort;
741   PetscFunctionReturn(0);
742 }
743 EXTERN_C_END
744 
745 EXTERN_C_BEGIN
746 #undef __FUNCT__
747 #define __FUNCT__ "PCGASMGetSubKSP_GASM"
748 /*
749    FIX: This routine might need to be modified once multiple ranks per subdomain are allowed.
750         In particular, it would upset the global subdomain number calculation.
751 */
752 PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
753 {
754   PC_GASM         *osm = (PC_GASM*)pc->data;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   if (osm->n < 1) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
759 
760   if (n) {
761     *n = osm->n;
762   }
763   if (first) {
764     ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
765     *first -= osm->n;
766   }
767   if (ksp) {
768     /* Assume that local solves are now different; not necessarily
769        true though!  This flag is used only for PCView_GASM() */
770     *ksp                   = osm->ksp;
771     osm->same_local_solves = PETSC_FALSE;
772   }
773   PetscFunctionReturn(0);
774 }/* PCGASMGetSubKSP_GASM() */
775 EXTERN_C_END
776 
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "PCGASMSetLocalSubdomains"
780 /*@C
781     PCGASMSetLocalSubdomains - Sets the local subdomains (for this processor
782     only) for the additive Schwarz preconditioner.
783 
784     Collective on PC
785 
786     Input Parameters:
787 +   pc - the preconditioner context
788 .   n - the number of subdomains for this processor (default value = 1)
789 .   is - the index set that defines the subdomains for this processor
790          (or PETSC_NULL for PETSc to determine subdomains)
791 -   is_local - the index sets that define the local part of the subdomains for this processor
792          (or PETSC_NULL to use the default of 1 subdomain per process)
793 
794     Notes:
795     The IS numbering is in the parallel, global numbering of the vector.
796 
797     By default the GASM preconditioner uses 1 block per processor.
798 
799     Use PCGASMSetTotalSubdomains() to set the subdomains for all processors.
800 
801     Level: advanced
802 
803 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
804 
805 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
806           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
807 @*/
808 PetscErrorCode  PCGASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
809 {
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
814   ierr = PetscTryMethod(pc,"PCGASMSetLocalSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,is,is_local));CHKERRQ(ierr);
815   PetscFunctionReturn(0);
816 }
817 
818 #undef __FUNCT__
819 #define __FUNCT__ "PCGASMSetTotalSubdomains"
820 /*@C
821     PCGASMSetTotalSubdomains - Sets the subdomains for all processor for the
822     additive Schwarz preconditioner.  Either all or no processors in the
823     PC communicator must call this routine, with the same index sets.
824 
825     Collective on PC
826 
827     Input Parameters:
828 +   pc - the preconditioner context
829 .   n - the number of subdomains for all processors
830 .   is - the index sets that define the subdomains for all processor
831          (or PETSC_NULL for PETSc to determine subdomains)
832 -   is_local - the index sets that define the local part of the subdomains for this processor
833          (or PETSC_NULL to use the default of 1 subdomain per process)
834 
835     Options Database Key:
836     To set the total number of subdomain blocks rather than specify the
837     index sets, use the option
838 .    -pc_gasm_blocks <blks> - Sets total blocks
839 
840     Notes:
841     Currently you cannot use this to set the actual subdomains with the argument is.
842 
843     By default the GASM preconditioner uses 1 block per processor.
844 
845     These index sets cannot be destroyed until after completion of the
846     linear solves for which the GASM preconditioner is being used.
847 
848     Use PCGASMSetLocalSubdomains() to set local subdomains.
849 
850     Level: advanced
851 
852 .keywords: PC, GASM, set, total, global, subdomains, additive Schwarz
853 
854 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
855           PCGASMCreateSubdomains2D()
856 @*/
857 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
858 {
859   PetscErrorCode ierr;
860 
861   PetscFunctionBegin;
862   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
863   ierr = PetscTryMethod(pc,"PCGASMSetTotalSubdomains_C",(PC,PetscInt),(pc,N));CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCGASMSetOverlap"
869 /*@
870     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
871     additive Schwarz preconditioner.  Either all or no processors in the
872     PC communicator must call this routine.
873 
874     Logically Collective on PC
875 
876     Input Parameters:
877 +   pc  - the preconditioner context
878 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
879 
880     Options Database Key:
881 .   -pc_gasm_overlap <ovl> - Sets overlap
882 
883     Notes:
884     By default the GASM preconditioner uses 1 block per processor.  To use
885     multiple blocks per perocessor, see PCGASMSetTotalSubdomains() and
886     PCGASMSetLocalSubdomains() (and the option -pc_gasm_blocks <blks>).
887 
888     The overlap defaults to 1, so if one desires that no additional
889     overlap be computed beyond what may have been set with a call to
890     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(), then ovl
891     must be set to be 0.  In particular, if one does not explicitly set
892     the subdomains an application code, then all overlap would be computed
893     internally by PETSc, and using an overlap of 0 would result in an GASM
894     variant that is equivalent to the block Jacobi preconditioner.
895 
896     Note that one can define initial index sets with any overlap via
897     PCGASMSetTotalSubdomains() or PCGASMSetLocalSubdomains(); the routine
898     PCGASMSetOverlap() merely allows PETSc to extend that overlap further
899     if desired.
900 
901     Level: intermediate
902 
903 .keywords: PC, GASM, set, overlap
904 
905 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
906           PCGASMCreateSubdomains2D(), PCGASMGetLocalSubdomains()
907 @*/
908 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
909 {
910   PetscErrorCode ierr;
911 
912   PetscFunctionBegin;
913   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
914   PetscValidLogicalCollectiveInt(pc,ovl,2);
915   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "PCGASMSetType"
921 /*@
922     PCGASMSetType - Sets the type of restriction and interpolation used
923     for local problems in the additive Schwarz method.
924 
925     Logically Collective on PC
926 
927     Input Parameters:
928 +   pc  - the preconditioner context
929 -   type - variant of GASM, one of
930 .vb
931       PC_GASM_BASIC       - full interpolation and restriction
932       PC_GASM_RESTRICT    - full restriction, local processor interpolation
933       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
934       PC_GASM_NONE        - local processor restriction and interpolation
935 .ve
936 
937     Options Database Key:
938 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
939 
940     Level: intermediate
941 
942 .keywords: PC, GASM, set, type
943 
944 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
945           PCGASMCreateSubdomains2D()
946 @*/
947 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
948 {
949   PetscErrorCode ierr;
950 
951   PetscFunctionBegin;
952   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
953   PetscValidLogicalCollectiveEnum(pc,type,2);
954   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "PCGASMSetSortIndices"
960 /*@
961     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
962 
963     Logically Collective on PC
964 
965     Input Parameters:
966 +   pc  - the preconditioner context
967 -   doSort - sort the subdomain indices
968 
969     Level: intermediate
970 
971 .keywords: PC, GASM, set, type
972 
973 .seealso: PCGASMSetLocalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMGetSubKSP(),
974           PCGASMCreateSubdomains2D()
975 @*/
976 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool  doSort)
977 {
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
982   PetscValidLogicalCollectiveBool(pc,doSort,2);
983   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "PCGASMGetSubKSP"
989 /*@C
990    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
991    this processor.
992 
993    Collective on PC iff first_local is requested
994 
995    Input Parameter:
996 .  pc - the preconditioner context
997 
998    Output Parameters:
999 +  n_local - the number of blocks on this processor or PETSC_NULL
1000 .  first_local - the global number of the first block on this processor or PETSC_NULL,
1001                  all processors must request or all must pass PETSC_NULL
1002 -  ksp - the array of KSP contexts
1003 
1004    Note:
1005    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1006 
1007    Currently for some matrix implementations only 1 block per processor
1008    is supported.
1009 
1010    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1011 
1012    Level: advanced
1013 
1014 .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context
1015 
1016 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetTotalSubdomains(), PCGASMSetOverlap(),
1017           PCGASMCreateSubdomains2D(),
1018 @*/
1019 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1020 {
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1025   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /* -------------------------------------------------------------------------------------*/
1030 /*MC
1031    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1032            its own KSP object.
1033 
1034    Options Database Keys:
1035 +  -pc_gasm_truelocal - Activates PCGGASMSetUseTrueLocal()
1036 .  -pc_gasm_blocks <blks> - Sets total blocks
1037 .  -pc_gasm_overlap <ovl> - Sets overlap
1038 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1039 
1040      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1041       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1042       -pc_gasm_type basic to use the standard GASM.
1043 
1044    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1045      than one processor. Defaults to one block per processor.
1046 
1047      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1048         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1049 
1050      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1051          and set the options directly on the resulting KSP object (you can access its PC
1052          with KSPGetPC())
1053 
1054 
1055    Level: beginner
1056 
1057    Concepts: additive Schwarz method
1058 
1059     References:
1060     An additive variant of the Schwarz alternating method for the case of many subregions
1061     M Dryja, OB Widlund - Courant Institute, New York University Technical report
1062 
1063     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1064     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
1065 
1066 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1067            PCBJACOBI, PCGASMSetUseTrueLocal(), PCGASMGetSubKSP(), PCGASMSetLocalSubdomains(),
1068            PCGASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()
1069 
1070 M*/
1071 
1072 EXTERN_C_BEGIN
1073 #undef __FUNCT__
1074 #define __FUNCT__ "PCCreate_GASM"
1075 PetscErrorCode  PCCreate_GASM(PC pc)
1076 {
1077   PetscErrorCode ierr;
1078   PC_GASM         *osm;
1079 
1080   PetscFunctionBegin;
1081   ierr = PetscNewLog(pc,PC_GASM,&osm);CHKERRQ(ierr);
1082   osm->N                 = PETSC_DECIDE;
1083   osm->n                 = 0;
1084   osm->nmax              = 0;
1085   osm->overlap           = 1;
1086   osm->ksp               = 0;
1087   osm->grestriction      = 0;
1088   osm->gprolongation     = 0;
1089   osm->gx                = 0;
1090   osm->gy                = 0;
1091   osm->x                 = 0;
1092   osm->y                 = 0;
1093   osm->is                = 0;
1094   osm->is_local          = 0;
1095   osm->pmat              = 0;
1096   osm->type              = PC_GASM_RESTRICT;
1097   osm->same_local_solves = PETSC_TRUE;
1098   osm->sort_indices      = PETSC_TRUE;
1099 
1100   pc->data                   = (void*)osm;
1101   pc->ops->apply             = PCApply_GASM;
1102   pc->ops->applytranspose    = PCApplyTranspose_GASM;
1103   pc->ops->setup             = PCSetUp_GASM;
1104   pc->ops->reset             = PCReset_GASM;
1105   pc->ops->destroy           = PCDestroy_GASM;
1106   pc->ops->setfromoptions    = PCSetFromOptions_GASM;
1107   pc->ops->setuponblocks     = PCSetUpOnBlocks_GASM;
1108   pc->ops->view              = PCView_GASM;
1109   pc->ops->applyrichardson   = 0;
1110 
1111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetLocalSubdomains_C","PCGASMSetLocalSubdomains_GASM",
1112                     PCGASMSetLocalSubdomains_GASM);CHKERRQ(ierr);
1113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetTotalSubdomains_C","PCGASMSetTotalSubdomains_GASM",
1114                     PCGASMSetTotalSubdomains_GASM);CHKERRQ(ierr);
1115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetOverlap_C","PCGASMSetOverlap_GASM",
1116                     PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetType_C","PCGASMSetType_GASM",
1118                     PCGASMSetType_GASM);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMSetSortIndices_C","PCGASMSetSortIndices_GASM",
1120                     PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCGASMGetSubKSP_C","PCGASMGetSubKSP_GASM",
1122                     PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 EXTERN_C_END
1126 
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "PCGASMCreateSubdomains"
1130 /*@C
1131    PCGASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
1132    preconditioner for a any problem on a general grid.
1133 
1134    Collective
1135 
1136    Input Parameters:
1137 +  A - The global matrix operator
1138 -  n - the number of local blocks
1139 
1140    Output Parameters:
1141 .  outis - the array of index sets defining the subdomains
1142 
1143    Level: advanced
1144 
1145    Note: this generates nonoverlapping subdomains; the PCGASM will generate the overlap
1146     from these if you use PCGASMSetLocalSubdomains()
1147 
1148     In the Fortran version you must provide the array outis[] already allocated of length n.
1149 
1150 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1151 
1152 .seealso: PCGASMSetLocalSubdomains(), PCGASMDestroySubdomains()
1153 @*/
1154 PetscErrorCode  PCGASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
1155 {
1156   MatPartitioning           mpart;
1157   const char                *prefix;
1158   PetscErrorCode            (*f)(Mat,MatReuse,Mat*);
1159   PetscMPIInt               size;
1160   PetscInt                  i,j,rstart,rend,bs;
1161   PetscBool                 isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1162   Mat                       Ad = PETSC_NULL, adj;
1163   IS                        ispart,isnumb,*is;
1164   PetscErrorCode            ierr;
1165 
1166   PetscFunctionBegin;
1167   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1168   PetscValidPointer(outis,3);
1169   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);
1170 
1171   /* Get prefix, row distribution, and block size */
1172   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1173   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1174   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1175   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1176 
1177   /* Get diagonal block from matrix if possible */
1178   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1179   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1180   if (f) {
1181     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1182   } else if (size == 1) {
1183     Ad = A;
1184   }
1185   if (Ad) {
1186     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1187     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1188   }
1189   if (Ad && n > 1) {
1190     PetscBool  match,done;
1191     /* Try to setup a good matrix partitioning if available */
1192     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1193     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1194     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1195     ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1196     if (!match) {
1197       ierr = PetscTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1198     }
1199     if (!match) { /* assume a "good" partitioner is available */
1200       PetscInt na,*ia,*ja;
1201       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1202       if (done) {
1203         /* Build adjacency matrix by hand. Unfortunately a call to
1204            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1205            remove the block-aij structure and we cannot expect
1206            MatPartitioning to split vertices as we need */
1207         PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1208         nnz = 0;
1209         for (i=0; i<na; i++) { /* count number of nonzeros */
1210           len = ia[i+1] - ia[i];
1211           row = ja + ia[i];
1212           for (j=0; j<len; j++) {
1213             if (row[j] == i) { /* don't count diagonal */
1214               len--; break;
1215             }
1216           }
1217           nnz += len;
1218         }
1219         ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1220         ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1221         nnz    = 0;
1222         iia[0] = 0;
1223         for (i=0; i<na; i++) { /* fill adjacency */
1224           cnt = 0;
1225           len = ia[i+1] - ia[i];
1226           row = ja + ia[i];
1227           for (j=0; j<len; j++) {
1228             if (row[j] != i) { /* if not diagonal */
1229               jja[nnz+cnt++] = row[j];
1230             }
1231           }
1232           nnz += cnt;
1233           iia[i+1] = nnz;
1234         }
1235         /* Partitioning of the adjacency matrix */
1236         ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1237         ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1238         ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1239         ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1240         ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1241         ierr = MatDestroy(&adj);CHKERRQ(ierr);
1242         foundpart = PETSC_TRUE;
1243       }
1244       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1245     }
1246     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1247   }
1248 
1249   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1250   *outis = is;
1251 
1252   if (!foundpart) {
1253 
1254     /* Partitioning by contiguous chunks of rows */
1255 
1256     PetscInt mbs   = (rend-rstart)/bs;
1257     PetscInt start = rstart;
1258     for (i=0; i<n; i++) {
1259       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1260       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1261       start += count;
1262     }
1263 
1264   } else {
1265 
1266     /* Partitioning by adjacency of diagonal block  */
1267 
1268     const PetscInt *numbering;
1269     PetscInt       *count,nidx,*indices,*newidx,start=0;
1270     /* Get node count in each partition */
1271     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1272     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1273     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1274       for (i=0; i<n; i++) count[i] *= bs;
1275     }
1276     /* Build indices from node numbering */
1277     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1278     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1279     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1280     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1281     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1282     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1283     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1284       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1285       for (i=0; i<nidx; i++)
1286         for (j=0; j<bs; j++)
1287           newidx[i*bs+j] = indices[i]*bs + j;
1288       ierr = PetscFree(indices);CHKERRQ(ierr);
1289       nidx   *= bs;
1290       indices = newidx;
1291     }
1292     /* Shift to get global indices */
1293     for (i=0; i<nidx; i++) indices[i] += rstart;
1294 
1295     /* Build the index sets for each block */
1296     for (i=0; i<n; i++) {
1297       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1298       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1299       start += count[i];
1300     }
1301 
1302     ierr = PetscFree(count);
1303     ierr = PetscFree(indices);
1304     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1305     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1306   }
1307 
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "PCGASMDestroySubdomains"
1313 /*@C
1314    PCGASMDestroySubdomains - Destroys the index sets created with
1315    PCGASMCreateSubdomains(). Should be called after setting subdomains
1316    with PCGASMSetLocalSubdomains().
1317 
1318    Collective
1319 
1320    Input Parameters:
1321 +  n - the number of index sets
1322 .  is - the array of index sets
1323 -  is_local - the array of local index sets, can be PETSC_NULL
1324 
1325    Level: advanced
1326 
1327 .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid
1328 
1329 .seealso: PCGASMCreateSubdomains(), PCGASMSetLocalSubdomains()
1330 @*/
1331 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1332 {
1333   PetscInt       i;
1334   PetscErrorCode ierr;
1335   PetscFunctionBegin;
1336   if (n <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1337   PetscValidPointer(is,2);
1338   for (i=0; i<n; i++) { ierr = ISDestroy(&is[i]);CHKERRQ(ierr); }
1339   ierr = PetscFree(is);CHKERRQ(ierr);
1340   if (is_local) {
1341     PetscValidPointer(is_local,3);
1342     for (i=0; i<n; i++) { ierr = ISDestroy(&is_local[i]);CHKERRQ(ierr); }
1343     ierr = PetscFree(is_local);CHKERRQ(ierr);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 
1349 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1350 {                                                                                                       \
1351  PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1352   /*                                                                                                    \
1353    Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1354    of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1355    subdomain).                                                                                          \
1356    Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1357    of the intersection.                                                                                 \
1358   */                                                                                                    \
1359   /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1360   *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1361   /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1362   *xleft_loc = *ylow_loc==first_row?PetscMax(first%M,xleft):xleft;                                                                            \
1363   /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1364   *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1365   /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1366   *xright_loc = *yhigh_loc==last_row?PetscMin(xright,last%M):xright;                                                                          \
1367   /* Now compute the size of the local subdomain n. */ \
1368   *n = 0;                                               \
1369   if(*ylow_loc < *yhigh_loc) {                           \
1370     PetscInt width = xright-xleft;                     \
1371     *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1372     *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1373     *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1374   }\
1375 }
1376 
1377 
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "PCGASMCreateSubdomains2D"
1381 /*@
1382    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1383    preconditioner for a two-dimensional problem on a regular grid.
1384 
1385    Collective
1386 
1387    Input Parameters:
1388 +  M, N - the global number of mesh points in the x and y directions
1389 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1390 .  dof - degrees of freedom per node
1391 -  overlap - overlap in mesh lines
1392 
1393    Output Parameters:
1394 +  Nsub - the number of local subdomains created
1395 .  is - array of index sets defining overlapping (if overlap > 0) subdomains
1396 -  is_local - array of index sets defining non-overlapping subdomains
1397 
1398    Note:
1399    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1400    preconditioners.  More general related routines are
1401    PCGASMSetTotalSubdomains() and PCGASMSetLocalSubdomains().
1402 
1403    Level: advanced
1404 
1405 .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid
1406 
1407 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetLocalSubdomains(), PCGASMGetSubKSP(),
1408           PCGASMSetOverlap()
1409 @*/
1410 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc, PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap, PetscInt *nsub,IS **is,IS **is_local)
1411 {
1412   PetscErrorCode ierr;
1413   PetscMPIInt    size, rank;
1414   PetscInt       i, j;
1415   PetscInt       maxheight, maxwidth;
1416   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1417   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1418   PetscInt       x[2][2], y[2][2], n[2];
1419   PetscInt       first, last;
1420   PetscInt       nidx, *idx;
1421   PetscInt       ii,jj,s,q,d;
1422   PetscInt       k,kk;
1423   PetscMPIInt    color;
1424   MPI_Comm       comm, subcomm;
1425   IS             **iis = 0;
1426 
1427   PetscFunctionBegin;
1428   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1429   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1430   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1431   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1432   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1433 	     "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1434 
1435   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1436   s = 0;
1437   ystart = 0;
1438   for (j=0; j<Ndomains; ++j) {
1439     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1440     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1441     /* Vertical domain limits with an overlap. */
1442     ylow = PetscMax(ystart - overlap,0);
1443     yhigh = PetscMin(ystart + maxheight + overlap,N);
1444     xstart = 0;
1445     for (i=0; i<Mdomains; ++i) {
1446       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1447       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1448       /* Horizontal domain limits with an overlap. */
1449       xleft   = PetscMax(xstart - overlap,0);
1450       xright  = PetscMin(xstart + maxwidth + overlap,M);
1451       /*
1452 	 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1453       */
1454       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1455       if(nidx) {
1456         ++s;
1457       }
1458       xstart += maxwidth;
1459     }/* for(i = 0; i < Mdomains; ++i) */
1460     ystart += maxheight;
1461   }/* for(j = 0; j < Ndomains; ++j) */
1462   /* Now we can allocate the necessary number of ISs. */
1463   *nsub = s;
1464   ierr = PetscMalloc((*nsub)*sizeof(IS*),is);CHKERRQ(ierr);
1465   ierr = PetscMalloc((*nsub)*sizeof(IS*),is_local);CHKERRQ(ierr);
1466   s = 0;
1467   ystart = 0;
1468   for (j=0; j<Ndomains; ++j) {
1469     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1470     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1471     /* Vertical domain limits with an overlap. */
1472     y[0][0] = PetscMax(ystart - overlap,0);
1473     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1474     /* Vertical domain limits without an overlap. */
1475     y[1][0] = ystart;
1476     y[1][1] = PetscMin(ystart + maxheight,N);
1477     xstart = 0;
1478     for (i=0; i<Mdomains; ++i) {
1479       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1480       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1481       /* Horizontal domain limits with an overlap. */
1482       x[0][0]  = PetscMax(xstart - overlap,0);
1483       x[0][1]  = PetscMin(xstart + maxwidth + overlap,M);
1484       /* Horizontal domain limits without an overlap. */
1485       x[1][0] = xstart;
1486       x[1][1] = PetscMin(xstart+maxwidth,M);
1487       /*
1488 	 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1489 	 Do this twice: first for the domains with overlaps, and once without.
1490 	 During the first pass create the subcommunicators, and use them on the second pass as well.
1491       */
1492       for(q = 0; q < 2; ++q) {
1493 	/*
1494 	  domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1495 	  according to whether the domain with an overlap or without is considered.
1496 	*/
1497 	xleft = x[q][0]; xright = x[q][1];
1498 	ylow  = y[q][0]; yhigh  = y[q][1];
1499         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1500 	nidx *= dof;
1501         n[q] = nidx;
1502         /*
1503          Based on the counted number of indices in the local domain *with an overlap*,
1504          construct a subcommunicator of all the processors supporting this domain.
1505          Observe that a domain with an overlap might have nontrivial local support,
1506          while the domain without an overlap might not.  Hence, the decision to participate
1507          in the subcommunicator must be based on the domain with an overlap.
1508          */
1509 	if (q == 0) {
1510 	  if(nidx) {
1511 	    color = 1;
1512 	  } else {
1513 	    color = MPI_UNDEFINED;
1514 	  }
1515 	  ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1516 	}
1517         /*
1518          Proceed only if the number of local indices *with an overlap* is nonzero.
1519          */
1520         if (n[0]) {
1521           if(q == 0) {
1522             iis = is;
1523           }
1524           if (q == 1) {
1525             /*
1526              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1527              Moreover, if the overlap is zero, the two ISs are identical.
1528              */
1529             if (overlap == 0) {
1530               (*is_local)[s] = (*is)[s];
1531               ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1532               continue;
1533             } else {
1534               iis = is_local;
1535               subcomm = ((PetscObject)(*is)[s])->comm;
1536             }
1537           }/* if(q == 1) */
1538           idx = PETSC_NULL;
1539 	  ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1540           if(nidx) {
1541             k    = 0;
1542             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1543               PetscInt x0 = (jj==ylow_loc)?xleft_loc:xleft;
1544               PetscInt x1 = (jj==yhigh_loc-1)?xright_loc:xright;
1545               kk = dof*(M*jj + x0);
1546               for (ii=x0; ii<x1; ++ii) {
1547                 for(d = 0; d < dof; ++d) {
1548                   idx[k++] = kk++;
1549                 }
1550               }
1551             }
1552           }
1553 	  ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*iis)+s);CHKERRQ(ierr);
1554 	}/* if(n[0]) */
1555       }/* for(q = 0; q < 2; ++q) */
1556       if(n[0]) {
1557         ++s;
1558       }
1559       xstart += maxwidth;
1560     }/* for(i = 0; i < Mdomains; ++i) */
1561     ystart += maxheight;
1562   }/* for(j = 0; j < Ndomains; ++j) */
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "PCGASMGetLocalSubdomains"
1568 /*@C
1569     PCGASMGetLocalSubdomains - Gets the local subdomains (for this processor
1570     only) for the additive Schwarz preconditioner.
1571 
1572     Not Collective
1573 
1574     Input Parameter:
1575 .   pc - the preconditioner context
1576 
1577     Output Parameters:
1578 +   n - the number of subdomains for this processor (default value = 1)
1579 .   is - the index sets that define the subdomains for this processor
1580 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1581 
1582 
1583     Notes:
1584     The IS numbering is in the parallel, global numbering of the vector.
1585 
1586     Level: advanced
1587 
1588 .keywords: PC, GASM, set, local, subdomains, additive Schwarz
1589 
1590 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1591           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubmatrices()
1592 @*/
1593 PetscErrorCode  PCGASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1594 {
1595   PC_GASM         *osm;
1596   PetscErrorCode ierr;
1597   PetscBool      match;
1598 
1599   PetscFunctionBegin;
1600   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1601   PetscValidIntPointer(n,2);
1602   if (is) PetscValidPointer(is,3);
1603   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1604   if (!match) {
1605     if (n)  *n  = 0;
1606     if (is) *is = PETSC_NULL;
1607   } else {
1608     osm = (PC_GASM*)pc->data;
1609     if (n)  *n  = osm->n;
1610     if (is) *is = osm->is;
1611     if (is_local) *is_local = osm->is_local;
1612   }
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "PCGASMGetLocalSubmatrices"
1618 /*@C
1619     PCGASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1620     only) for the additive Schwarz preconditioner.
1621 
1622     Not Collective
1623 
1624     Input Parameter:
1625 .   pc - the preconditioner context
1626 
1627     Output Parameters:
1628 +   n - the number of matrices for this processor (default value = 1)
1629 -   mat - the matrices
1630 
1631 
1632     Level: advanced
1633 
1634 .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi
1635 
1636 .seealso: PCGASMSetTotalSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1637           PCGASMCreateSubdomains2D(), PCGASMSetLocalSubdomains(), PCGASMGetLocalSubdomains()
1638 @*/
1639 PetscErrorCode  PCGASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1640 {
1641   PC_GASM         *osm;
1642   PetscErrorCode ierr;
1643   PetscBool      match;
1644 
1645   PetscFunctionBegin;
1646   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1647   PetscValidIntPointer(n,2);
1648   if (mat) PetscValidPointer(mat,3);
1649   if (!pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1650   ierr = PetscTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1651   if (!match) {
1652     if (n)   *n   = 0;
1653     if (mat) *mat = PETSC_NULL;
1654   } else {
1655     osm = (PC_GASM*)pc->data;
1656     if (n)   *n   = osm->n;
1657     if (mat) *mat = osm->pmat;
1658   }
1659   PetscFunctionReturn(0);
1660 }
1661