xref: /petsc/src/ksp/pc/impls/asm/asm.c (revision 6aa9148f4397cd347261e3043c0fcf0ae62e5ac4)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an additive Schwarz preconditioner for any Mat implementation.
5 
6   Note that each processor may have any number of subdomains. But in order to
7   deal easily with the VecScatter(), we treat each processor as if it has the
8   same number of subdomains.
9 
10        n - total number of true subdomains on all processors
11        n_local_true - actual number of subdomains on this processor
12        n_local = maximum over all processors of n_local_true
13 */
14 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
15 
16 typedef struct {
17   PetscInt   n, n_local, n_local_true;
18   PetscInt   overlap;             /* overlap requested by user */
19   KSP        *ksp;                /* linear solvers for each block */
20   VecScatter *restriction;        /* mapping from global to subregion */
21   VecScatter *localization;       /* mapping from overlapping to non-overlapping subregion */
22   VecScatter *prolongation;       /* mapping from subregion to global */
23   Vec        *x,*y,*y_local;      /* work vectors */
24   IS         *is;                 /* index set that defines each overlapping subdomain */
25   IS         *is_local;           /* index set that defines each non-overlapping subdomain, may be NULL */
26   Mat        *mat,*pmat;          /* mat is not currently used */
27   PCASMType  type;                /* use reduced interpolation, restriction or both */
28   PetscTruth type_set;            /* if user set this value (so won't change it for symmetric problems) */
29   PetscTruth same_local_solves;   /* flag indicating whether all local solvers are same */
30 } PC_ASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCView_ASM"
34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35 {
36   PC_ASM         *osm = (PC_ASM*)pc->data;
37   PetscErrorCode ierr;
38   PetscMPIInt    rank;
39   PetscInt       i,bsz;
40   PetscTruth     iascii,isstring;
41   PetscViewer    sviewer;
42 
43 
44   PetscFunctionBegin;
45   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
46   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
47   if (iascii) {
48     if (osm->n > 0) {
49       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr);
50     } else {
51       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr);
54     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
55     if (osm->same_local_solves) {
56       if (osm->ksp) {
57         ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr);
58         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
59         if (!rank) {
60           ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
61           ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
62           ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
63         }
64         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
65       }
66     } else {
67       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr);
68       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
69       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
70       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
71       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
72       for (i=0; i<osm->n_local; i++) {
73         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
74         if (i < osm->n_local_true) {
75           ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
76           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr);
77           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
78           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
79         }
80         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
81       }
82       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
83       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
84     }
85   } else if (isstring) {
86     ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr);
87     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
88     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
89     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
90   } else {
91     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "PCASMPrintSubdomains"
98 static PetscErrorCode PCASMPrintSubdomains(PC pc)
99 {
100   PC_ASM         *osm  = (PC_ASM*)pc->data;
101   const char     *prefix;
102   char           fname[PETSC_MAX_PATH_LEN+1];
103   PetscViewer    viewer;
104   PetscInt       i,j,nidx;
105   const PetscInt *idx;
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
110   ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr);
111   if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
112   ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr);
113   for (i=0;i<osm->n_local_true;i++) {
114     ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr);
115     ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr);
116     for (j=0; j<nidx; j++) {
117       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
118     }
119     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
120     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
121     if (osm->is_local) {
122       ierr = ISGetLocalSize(osm->is_local[i],&nidx);CHKERRQ(ierr);
123       ierr = ISGetIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
124       for (j=0; j<nidx; j++) {
125         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
126       }
127       ierr = ISRestoreIndices(osm->is_local[i],&idx);CHKERRQ(ierr);
128       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
129     }
130   }
131   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
132   ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "PCSetUp_ASM"
138 static PetscErrorCode PCSetUp_ASM(PC pc)
139 {
140   PC_ASM         *osm  = (PC_ASM*)pc->data;
141   PetscErrorCode ierr;
142   PetscTruth     symset,flg;
143   PetscInt       i,m,m_local,firstRow,lastRow;
144   PetscMPIInt    size;
145   MatReuse       scall = MAT_REUSE_MATRIX;
146   IS             isl;
147   KSP            ksp;
148   PC             subpc;
149   const char     *prefix,*pprefix;
150   Vec            vec;
151 
152   PetscFunctionBegin;
153   if (!pc->setupcalled) {
154 
155     if (!osm->type_set) {
156       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
157       if (symset && flg) { osm->type = PC_ASM_BASIC; }
158     }
159 
160     if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) {
161       /* no subdomains given, use one per processor */
162       osm->n_local = osm->n_local_true = 1;
163       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
164       osm->n = size;
165     } else if (osm->n == PETSC_DECIDE) {
166       /* determine global number of subdomains */
167       PetscInt inwork[2],outwork[2];
168       inwork[0] = inwork[1] = osm->n_local_true;
169       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
170       osm->n_local = outwork[0];
171       osm->n       = outwork[1];
172     }
173 
174     if (!osm->is){ /* create the index sets */
175       ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr);
176     }
177     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
178     flg  = PETSC_FALSE;
179     ierr = PetscOptionsGetTruth(prefix,"-pc_asm_print_subdomains",&flg,PETSC_NULL);CHKERRQ(ierr);
180     if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); }
181 
182     /*  Extend the "overlapping" regions by a number of steps  */
183     ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr);
184     for (i=0; i<osm->n_local_true; i++) {
185       ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
186       if (osm->is_local) {
187         ierr = ISSort(osm->is_local[i]);CHKERRQ(ierr);
188       }
189     }
190 
191     /* Create the local work vectors and scatter contexts */
192     ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
193     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->restriction);CHKERRQ(ierr);
194     if (osm->is_local) {ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->localization);CHKERRQ(ierr);}
195     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->prolongation);CHKERRQ(ierr);
196     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr);
197     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr);
198     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y_local);CHKERRQ(ierr);
199     ierr = VecGetOwnershipRange(vec, &firstRow, &lastRow);CHKERRQ(ierr);
200     for (i=0; i<osm->n_local_true; ++i, firstRow += m_local) {
201       ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr);
202       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr);
203       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
204       ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr);
205       ierr = ISDestroy(isl);CHKERRQ(ierr);
206       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
207       if (osm->is_local) {
208         IS              isShift;
209         const PetscInt *idx_local;
210         PetscInt       *idx, j;
211 
212         ierr = ISGetLocalSize(osm->is_local[i],&m_local);CHKERRQ(ierr);
213         ierr = ISGetIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr);
214         ierr = PetscMalloc(m*sizeof(PetscInt), &idx);CHKERRQ(ierr);
215         for(j = 0; j < m_local; ++j) {
216           idx[j] = idx_local[j] - firstRow;
217         }
218         ierr = ISRestoreIndices(osm->is_local[i], &idx_local);CHKERRQ(ierr);
219         ierr = ISCreateGeneral(PETSC_COMM_SELF,m_local,idx,&isShift);CHKERRQ(ierr);
220         ierr = PetscFree(idx);CHKERRQ(ierr);
221         ierr = ISCreateStride(PETSC_COMM_SELF,m_local,0,1,&isl);CHKERRQ(ierr);
222         ierr = VecCreateSeq(PETSC_COMM_SELF,m_local,&osm->y_local[i]);CHKERRQ(ierr);
223         ierr = VecScatterCreate(osm->y[i],isShift,osm->y_local[i],isl,&osm->localization[i]);CHKERRQ(ierr);
224         ierr = ISDestroy(isShift);CHKERRQ(ierr);
225 
226         ierr = VecScatterCreate(vec,osm->is_local[i],osm->y_local[i],isl,&osm->prolongation[i]);CHKERRQ(ierr);
227         ierr = ISDestroy(isl);CHKERRQ(ierr);
228       } else {
229         ierr = VecGetLocalSize(vec,&m_local);CHKERRQ(ierr);
230         osm->y_local[i] = osm->y[i];
231         ierr = PetscObjectReference((PetscObject) osm->y[i]);CHKERRQ(ierr);
232         osm->prolongation[i] = osm->restriction[i];
233         ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
234       }
235     }
236     if (firstRow != lastRow) SETERRQ2(PETSC_ERR_PLIB, "Specified ASM subdomain sizes were invalid: %d != %d", firstRow, lastRow);
237     for (i=osm->n_local_true; i<osm->n_local; i++) {
238       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr);
239       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
240       ierr = VecDuplicate(osm->x[i],&osm->y_local[i]);CHKERRQ(ierr);
241       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr);
242       ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->restriction[i]);CHKERRQ(ierr);
243       osm->prolongation[i] = osm->restriction[i];
244       ierr = PetscObjectReference((PetscObject) osm->restriction[i]);CHKERRQ(ierr);
245       ierr = ISDestroy(isl);CHKERRQ(ierr);
246     }
247     ierr = VecDestroy(vec);CHKERRQ(ierr);
248 
249     /* Create the local solvers */
250     ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
251     for (i=0; i<osm->n_local_true; i++) {
252       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
253       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
254       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
255       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
256       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
257       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
258       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
259       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
260       osm->ksp[i] = ksp;
261     }
262     scall = MAT_INITIAL_MATRIX;
263 
264   } else {
265     /*
266        Destroy the blocks from the previous iteration
267     */
268     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
269       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
270       scall = MAT_INITIAL_MATRIX;
271     }
272   }
273 
274   /*
275      Extract out the submatrices
276   */
277   ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr);
278   if (scall == MAT_INITIAL_MATRIX) {
279     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
280     for (i=0; i<osm->n_local_true; i++) {
281       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
282       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
283     }
284   }
285 
286   /* Return control to the user so that the submatrices can be modified (e.g., to apply
287      different boundary conditions for the submatrices than for the global problem) */
288   ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
289 
290   /*
291      Loop over subdomains putting them into local ksp
292   */
293   for (i=0; i<osm->n_local_true; i++) {
294     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
295     if (!pc->setupcalled) {
296       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
297     }
298   }
299 
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "PCSetUpOnBlocks_ASM"
305 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
306 {
307   PC_ASM         *osm = (PC_ASM*)pc->data;
308   PetscErrorCode ierr;
309   PetscInt       i;
310 
311   PetscFunctionBegin;
312   for (i=0; i<osm->n_local_true; i++) {
313     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "PCApply_ASM"
320 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
321 {
322   PC_ASM         *osm = (PC_ASM*)pc->data;
323   PetscErrorCode ierr;
324   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
325   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
326 
327   PetscFunctionBegin;
328   /*
329      Support for limiting the restriction or interpolation to only local
330      subdomain values (leaving the other values 0).
331   */
332   if (!(osm->type & PC_ASM_RESTRICT)) {
333     forward = SCATTER_FORWARD_LOCAL;
334     /* have to zero the work RHS since scatter may leave some slots empty */
335     for (i=0; i<n_local_true; i++) {
336       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
337     }
338   }
339   if (!(osm->type & PC_ASM_INTERPOLATE)) {
340     reverse = SCATTER_REVERSE_LOCAL;
341   }
342 
343   for (i=0; i<n_local; i++) {
344     ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
345   }
346   ierr = VecSet(y,0.0);CHKERRQ(ierr);
347   /* do the local solves */
348   for (i=0; i<n_local_true; i++) {
349     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
350     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
351     if (osm->localization) {
352       ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
353       ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
354     }
355     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
356   }
357   /* handle the rest of the scatters that do not have local solves */
358   for (i=n_local_true; i<n_local; i++) {
359     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
360     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
361   }
362   for (i=0; i<n_local; i++) {
363     ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "PCApplyTranspose_ASM"
370 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
371 {
372   PC_ASM         *osm = (PC_ASM*)pc->data;
373   PetscErrorCode ierr;
374   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
375   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
376 
377   PetscFunctionBegin;
378   /*
379      Support for limiting the restriction or interpolation to only local
380      subdomain values (leaving the other values 0).
381 
382      Note: these are reversed from the PCApply_ASM() because we are applying the
383      transpose of the three terms
384   */
385   if (!(osm->type & PC_ASM_INTERPOLATE)) {
386     forward = SCATTER_FORWARD_LOCAL;
387     /* have to zero the work RHS since scatter may leave some slots empty */
388     for (i=0; i<n_local_true; i++) {
389       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
390     }
391   }
392   if (!(osm->type & PC_ASM_RESTRICT)) {
393     reverse = SCATTER_REVERSE_LOCAL;
394   }
395 
396   for (i=0; i<n_local; i++) {
397     ierr = VecScatterBegin(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
398   }
399   ierr = VecSet(y,0.0);CHKERRQ(ierr);
400   /* do the local solves */
401   for (i=0; i<n_local_true; i++) {
402     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
403     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
404     if (osm->localization) {
405       ierr = VecScatterBegin(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
406       ierr = VecScatterEnd(osm->localization[i],osm->y[i],osm->y_local[i],INSERT_VALUES,forward);CHKERRQ(ierr);
407     }
408     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
409   }
410   /* handle the rest of the scatters that do not have local solves */
411   for (i=n_local_true; i<n_local; i++) {
412     ierr = VecScatterEnd(osm->restriction[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
413     ierr = VecScatterBegin(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
414   }
415   for (i=0; i<n_local; i++) {
416     ierr = VecScatterEnd(osm->prolongation[i],osm->y_local[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "PCDestroy_ASM"
423 static PetscErrorCode PCDestroy_ASM(PC pc)
424 {
425   PC_ASM         *osm = (PC_ASM*)pc->data;
426   PetscErrorCode ierr;
427   PetscInt       i;
428 
429   PetscFunctionBegin;
430   if (osm->ksp) {
431     for (i=0; i<osm->n_local_true; i++) {
432       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
433     }
434     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
435   }
436   if (osm->pmat) {
437     if (osm->n_local_true > 0) {
438       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
439     }
440   }
441   if (osm->restriction) {
442     for (i=0; i<osm->n_local; i++) {
443       ierr = VecScatterDestroy(osm->restriction[i]);CHKERRQ(ierr);
444       if (osm->localization) {ierr = VecScatterDestroy(osm->localization[i]);CHKERRQ(ierr);}
445       ierr = VecScatterDestroy(osm->prolongation[i]);CHKERRQ(ierr);
446       ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
447       ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
448       ierr = VecDestroy(osm->y_local[i]);CHKERRQ(ierr);
449     }
450     ierr = PetscFree(osm->restriction);CHKERRQ(ierr);
451     if (osm->localization) {ierr = PetscFree(osm->localization);CHKERRQ(ierr);}
452     ierr = PetscFree(osm->prolongation);CHKERRQ(ierr);
453     ierr = PetscFree(osm->x);CHKERRQ(ierr);
454     ierr = PetscFree(osm->y);CHKERRQ(ierr);
455     ierr = PetscFree(osm->y_local);CHKERRQ(ierr);
456   }
457   if (osm->is) {
458     ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
459   }
460   ierr = PetscFree(osm);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "PCSetFromOptions_ASM"
466 static PetscErrorCode PCSetFromOptions_ASM(PC pc)
467 {
468   PC_ASM         *osm = (PC_ASM*)pc->data;
469   PetscErrorCode ierr;
470   PetscInt       blocks,ovl;
471   PetscTruth     symset,flg;
472   PCASMType      asmtype;
473 
474   PetscFunctionBegin;
475   /* set the type to symmetric if matrix is symmetric */
476   if (!osm->type_set && pc->pmat) {
477     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
478     if (symset && flg) { osm->type = PC_ASM_BASIC; }
479   }
480   ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr);
481     ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",osm->n,&blocks,&flg);CHKERRQ(ierr);
482     if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); }
483     ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
484     if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
485     flg  = PETSC_FALSE;
486     ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr);
487     if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); }
488   ierr = PetscOptionsTail();CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 /*------------------------------------------------------------------------------------*/
493 
494 EXTERN_C_BEGIN
495 #undef __FUNCT__
496 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM"
497 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[],IS is_local[])
498 {
499   PC_ASM         *osm = (PC_ASM*)pc->data;
500   PetscErrorCode ierr;
501   PetscInt       i;
502 
503   PetscFunctionBegin;
504   if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
505   if (pc->setupcalled && (n != osm->n_local_true || is)) {
506     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup().");
507   }
508   if (!pc->setupcalled) {
509     if (is) {
510       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
511     }
512     if (is_local) {
513       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is_local[i]);CHKERRQ(ierr);}
514     }
515     if (osm->is) {
516       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
517     }
518     osm->n_local_true = n;
519     osm->is           = 0;
520     osm->is_local     = 0;
521     if (is) {
522       ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr);
523       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
524     }
525     if (is_local) {
526       ierr = PetscMalloc(n*sizeof(IS *),&osm->is_local);CHKERRQ(ierr);
527       for (i=0; i<n; i++) { osm->is_local[i] = is_local[i]; }
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 EXTERN_C_END
533 
534 EXTERN_C_BEGIN
535 #undef __FUNCT__
536 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM"
537 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is)
538 {
539   PC_ASM         *osm = (PC_ASM*)pc->data;
540   PetscErrorCode ierr;
541   PetscMPIInt    rank,size;
542   PetscInt       n;
543 
544   PetscFunctionBegin;
545   if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
546   if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
547 
548   /*
549      Split the subdomains equally amoung all processors
550   */
551   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
552   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
553   n = N/size + ((N % size) > rank);
554   if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
555   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup().");
556   if (!pc->setupcalled) {
557     if (osm->is) {
558       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is,osm->is_local);CHKERRQ(ierr);
559     }
560     osm->n_local_true = n;
561     osm->is           = 0;
562     osm->is_local     = 0;
563   }
564   PetscFunctionReturn(0);
565 }
566 EXTERN_C_END
567 
568 EXTERN_C_BEGIN
569 #undef __FUNCT__
570 #define __FUNCT__ "PCASMSetOverlap_ASM"
571 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
572 {
573   PC_ASM *osm = (PC_ASM*)pc->data;
574 
575   PetscFunctionBegin;
576   if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
577   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup().");
578   if (!pc->setupcalled) {
579     osm->overlap = ovl;
580   }
581   PetscFunctionReturn(0);
582 }
583 EXTERN_C_END
584 
585 EXTERN_C_BEGIN
586 #undef __FUNCT__
587 #define __FUNCT__ "PCASMSetType_ASM"
588 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type)
589 {
590   PC_ASM *osm = (PC_ASM*)pc->data;
591 
592   PetscFunctionBegin;
593   osm->type     = type;
594   osm->type_set = PETSC_TRUE;
595   PetscFunctionReturn(0);
596 }
597 EXTERN_C_END
598 
599 EXTERN_C_BEGIN
600 #undef __FUNCT__
601 #define __FUNCT__ "PCASMGetSubKSP_ASM"
602 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
603 {
604   PC_ASM         *osm = (PC_ASM*)pc->data;
605   PetscErrorCode ierr;
606 
607   PetscFunctionBegin;
608   if (osm->n_local_true < 1) {
609     SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
610   }
611 
612   if (n_local) {
613     *n_local = osm->n_local_true;
614   }
615   if (first_local) {
616     ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
617     *first_local -= osm->n_local_true;
618   }
619   if (ksp) {
620     /* Assume that local solves are now different; not necessarily
621        true though!  This flag is used only for PCView_ASM() */
622     *ksp                   = osm->ksp;
623     osm->same_local_solves = PETSC_FALSE;
624   }
625   PetscFunctionReturn(0);
626 }
627 EXTERN_C_END
628 
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "PCASMSetLocalSubdomains"
632 /*@C
633     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor
634     only) for the additive Schwarz preconditioner.
635 
636     Collective on PC
637 
638     Input Parameters:
639 +   pc - the preconditioner context
640 .   n - the number of subdomains for this processor (default value = 1)
641 .   is - the index sets that define the subdomains for this processor
642          (or PETSC_NULL for PETSc to determine subdomains)
643 -   is_local - the index sets that define the local part of the subdomains for this processor
644          (or PETSC_NULL to use the default of 1 subdomain per process)
645 
646     Notes:
647     The IS numbering is in the parallel, global numbering of the vector.
648 
649     By default the ASM preconditioner uses 1 block per processor.
650 
651     These index sets cannot be destroyed until after completion of the
652     linear solves for which the ASM preconditioner is being used.
653 
654     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
655 
656     Level: advanced
657 
658 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
659 
660 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
661           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
662 @*/
663 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[],IS is_local[])
664 {
665   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]);
666 
667   PetscFunctionBegin;
668   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
669   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
670   if (f) {
671     ierr = (*f)(pc,n,is,is_local);CHKERRQ(ierr);
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "PCASMSetTotalSubdomains"
678 /*@C
679     PCASMSetTotalSubdomains - Sets the subdomains for all processor for the
680     additive Schwarz preconditioner.  Either all or no processors in the
681     PC communicator must call this routine, with the same index sets.
682 
683     Collective on PC
684 
685     Input Parameters:
686 +   pc - the preconditioner context
687 .   n - the number of subdomains for all processors
688 .   is - the index sets that define the subdomains for all processor
689          (or PETSC_NULL for PETSc to determine subdomains)
690 -   is_local - the index sets that define the local part of the subdomains for this processor
691          (or PETSC_NULL to use the default of 1 subdomain per process)
692 
693     Options Database Key:
694     To set the total number of subdomain blocks rather than specify the
695     index sets, use the option
696 .    -pc_asm_blocks <blks> - Sets total blocks
697 
698     Notes:
699     Currently you cannot use this to set the actual subdomains with the argument is.
700 
701     By default the ASM preconditioner uses 1 block per processor.
702 
703     These index sets cannot be destroyed until after completion of the
704     linear solves for which the ASM preconditioner is being used.
705 
706     Use PCASMSetLocalSubdomains() to set local subdomains.
707 
708     Level: advanced
709 
710 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
711 
712 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
713           PCASMCreateSubdomains2D()
714 @*/
715 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS is[],IS is_local[])
716 {
717   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[],IS[]);
718 
719   PetscFunctionBegin;
720   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
721   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
722   if (f) {
723     ierr = (*f)(pc,N,is,is_local);CHKERRQ(ierr);
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "PCASMSetOverlap"
730 /*@
731     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
732     additive Schwarz preconditioner.  Either all or no processors in the
733     PC communicator must call this routine.
734 
735     Collective on PC
736 
737     Input Parameters:
738 +   pc  - the preconditioner context
739 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
740 
741     Options Database Key:
742 .   -pc_asm_overlap <ovl> - Sets overlap
743 
744     Notes:
745     By default the ASM preconditioner uses 1 block per processor.  To use
746     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
747     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
748 
749     The overlap defaults to 1, so if one desires that no additional
750     overlap be computed beyond what may have been set with a call to
751     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
752     must be set to be 0.  In particular, if one does not explicitly set
753     the subdomains an application code, then all overlap would be computed
754     internally by PETSc, and using an overlap of 0 would result in an ASM
755     variant that is equivalent to the block Jacobi preconditioner.
756 
757     Note that one can define initial index sets with any overlap via
758     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
759     PCASMSetOverlap() merely allows PETSc to extend that overlap further
760     if desired.
761 
762     Level: intermediate
763 
764 .keywords: PC, ASM, set, overlap
765 
766 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
767           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
768 @*/
769 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl)
770 {
771   PetscErrorCode ierr,(*f)(PC,PetscInt);
772 
773   PetscFunctionBegin;
774   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
775   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr);
776   if (f) {
777     ierr = (*f)(pc,ovl);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "PCASMSetType"
784 /*@
785     PCASMSetType - Sets the type of restriction and interpolation used
786     for local problems in the additive Schwarz method.
787 
788     Collective on PC
789 
790     Input Parameters:
791 +   pc  - the preconditioner context
792 -   type - variant of ASM, one of
793 .vb
794       PC_ASM_BASIC       - full interpolation and restriction
795       PC_ASM_RESTRICT    - full restriction, local processor interpolation
796       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
797       PC_ASM_NONE        - local processor restriction and interpolation
798 .ve
799 
800     Options Database Key:
801 .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
802 
803     Level: intermediate
804 
805 .keywords: PC, ASM, set, type
806 
807 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
808           PCASMCreateSubdomains2D()
809 @*/
810 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type)
811 {
812   PetscErrorCode ierr,(*f)(PC,PCASMType);
813 
814   PetscFunctionBegin;
815   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
816   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr);
817   if (f) {
818     ierr = (*f)(pc,type);CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "PCASMGetSubKSP"
825 /*@C
826    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
827    this processor.
828 
829    Collective on PC iff first_local is requested
830 
831    Input Parameter:
832 .  pc - the preconditioner context
833 
834    Output Parameters:
835 +  n_local - the number of blocks on this processor or PETSC_NULL
836 .  first_local - the global number of the first block on this processor or PETSC_NULL,
837                  all processors must request or all must pass PETSC_NULL
838 -  ksp - the array of KSP contexts
839 
840    Note:
841    After PCASMGetSubKSP() the array of KSPes is not to be freed
842 
843    Currently for some matrix implementations only 1 block per processor
844    is supported.
845 
846    You must call KSPSetUp() before calling PCASMGetSubKSP().
847 
848    Level: advanced
849 
850 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
851 
852 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
853           PCASMCreateSubdomains2D(),
854 @*/
855 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
856 {
857   PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **);
858 
859   PetscFunctionBegin;
860   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
861   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr);
862   if (f) {
863     ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr);
864   } else {
865     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC");
866   }
867 
868  PetscFunctionReturn(0);
869 }
870 
871 /* -------------------------------------------------------------------------------------*/
872 /*MC
873    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
874            its own KSP object.
875 
876    Options Database Keys:
877 +  -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
878 .  -pc_asm_blocks <blks> - Sets total blocks
879 .  -pc_asm_overlap <ovl> - Sets overlap
880 -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
881 
882      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
883       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
884       -pc_asm_type basic to use the standard ASM.
885 
886    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
887      than one processor. Defaults to one block per processor.
888 
889      To set options on the solvers for each block append -sub_ to all the KSP, and PC
890         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
891 
892      To set the options on the solvers separate for each block call PCASMGetSubKSP()
893          and set the options directly on the resulting KSP object (you can access its PC
894          with KSPGetPC())
895 
896 
897    Level: beginner
898 
899    Concepts: additive Schwarz method
900 
901     References:
902     An additive variant of the Schwarz alternating method for the case of many subregions
903     M Dryja, OB Widlund - Courant Institute, New York University Technical report
904 
905     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
906     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
907 
908 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
909            PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
910            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType()
911 
912 M*/
913 
914 EXTERN_C_BEGIN
915 #undef __FUNCT__
916 #define __FUNCT__ "PCCreate_ASM"
917 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc)
918 {
919   PetscErrorCode ierr;
920   PC_ASM         *osm;
921 
922   PetscFunctionBegin;
923   ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr);
924   osm->n                 = PETSC_DECIDE;
925   osm->n_local           = 0;
926   osm->n_local_true      = 0;
927   osm->overlap           = 1;
928   osm->ksp               = 0;
929   osm->restriction       = 0;
930   osm->localization      = 0;
931   osm->prolongation      = 0;
932   osm->x                 = 0;
933   osm->y                 = 0;
934   osm->y_local           = 0;
935   osm->is                = 0;
936   osm->is_local          = 0;
937   osm->mat               = 0;
938   osm->pmat              = 0;
939   osm->type              = PC_ASM_RESTRICT;
940   osm->same_local_solves = PETSC_TRUE;
941 
942   pc->data                   = (void*)osm;
943   pc->ops->apply             = PCApply_ASM;
944   pc->ops->applytranspose    = PCApplyTranspose_ASM;
945   pc->ops->setup             = PCSetUp_ASM;
946   pc->ops->destroy           = PCDestroy_ASM;
947   pc->ops->setfromoptions    = PCSetFromOptions_ASM;
948   pc->ops->setuponblocks     = PCSetUpOnBlocks_ASM;
949   pc->ops->view              = PCView_ASM;
950   pc->ops->applyrichardson   = 0;
951 
952   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
953                     PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr);
954   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
955                     PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr);
956   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
957                     PCASMSetOverlap_ASM);CHKERRQ(ierr);
958   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
959                     PCASMSetType_ASM);CHKERRQ(ierr);
960   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
961                     PCASMGetSubKSP_ASM);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 EXTERN_C_END
965 
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "PCASMCreateSubdomains"
969 /*@C
970    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
971    preconditioner for a any problem on a general grid.
972 
973    Collective
974 
975    Input Parameters:
976 +  A - The global matrix operator
977 -  n - the number of local blocks
978 
979    Output Parameters:
980 .  outis - the array of index sets defining the subdomains
981 
982    Level: advanced
983 
984    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
985     from these if you use PCASMSetLocalSubdomains()
986 
987     In the Fortran version you must provide the array outis[] already allocated of length n.
988 
989 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
990 
991 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
992 @*/
993 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
994 {
995   MatPartitioning           mpart;
996   const char                *prefix;
997   PetscErrorCode            (*f)(Mat,PetscTruth*,MatReuse,Mat*);
998   PetscMPIInt               size;
999   PetscInt                  i,j,rstart,rend,bs;
1000   PetscTruth                iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1001   Mat                       Ad = PETSC_NULL, adj;
1002   IS                        ispart,isnumb,*is;
1003   PetscErrorCode            ierr;
1004 
1005   PetscFunctionBegin;
1006   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
1007   PetscValidPointer(outis,3);
1008   if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);}
1009 
1010   /* Get prefix, row distribution, and block size */
1011   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1012   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1013   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1014   if (rstart/bs*bs != rstart || rend/bs*bs != rend) {
1015     SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1016   }
1017   /* Get diagonal block from matrix if possible */
1018   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1019   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1020   if (f) {
1021     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1022   } else if (size == 1) {
1023     iscopy = PETSC_FALSE; Ad = A;
1024   } else {
1025     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1026   }
1027   if (Ad) {
1028     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1029     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1030   }
1031   if (Ad && n > 1) {
1032     PetscTruth match,done;
1033     /* Try to setup a good matrix partitioning if available */
1034     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1035     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1036     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1037     ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr);
1038     if (!match) {
1039       ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr);
1040     }
1041     if (!match) { /* assume a "good" partitioner is available */
1042       PetscInt na,*ia,*ja;
1043       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1044       if (done) {
1045 	/* Build adjacency matrix by hand. Unfortunately a call to
1046 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1047 	   remove the block-aij structure and we cannot expect
1048 	   MatPartitioning to split vertices as we need */
1049 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1050 	nnz = 0;
1051 	for (i=0; i<na; i++) { /* count number of nonzeros */
1052 	  len = ia[i+1] - ia[i];
1053 	  row = ja + ia[i];
1054 	  for (j=0; j<len; j++) {
1055 	    if (row[j] == i) { /* don't count diagonal */
1056 	      len--; break;
1057 	    }
1058 	  }
1059 	  nnz += len;
1060 	}
1061 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1062 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1063 	nnz    = 0;
1064 	iia[0] = 0;
1065 	for (i=0; i<na; i++) { /* fill adjacency */
1066 	  cnt = 0;
1067 	  len = ia[i+1] - ia[i];
1068 	  row = ja + ia[i];
1069 	  for (j=0; j<len; j++) {
1070 	    if (row[j] != i) { /* if not diagonal */
1071 	      jja[nnz+cnt++] = row[j];
1072 	    }
1073 	  }
1074 	  nnz += cnt;
1075 	  iia[i+1] = nnz;
1076 	}
1077 	/* Partitioning of the adjacency matrix */
1078 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1079 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1080 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1081 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1082 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1083 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1084 	foundpart = PETSC_TRUE;
1085       }
1086       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1087     }
1088     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1089   }
1090   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1091 
1092   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1093   *outis = is;
1094 
1095   if (!foundpart) {
1096 
1097     /* Partitioning by contiguous chunks of rows */
1098 
1099     PetscInt mbs   = (rend-rstart)/bs;
1100     PetscInt start = rstart;
1101     for (i=0; i<n; i++) {
1102       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1103       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1104       start += count;
1105     }
1106 
1107   } else {
1108 
1109     /* Partitioning by adjacency of diagonal block  */
1110 
1111     const PetscInt *numbering;
1112     PetscInt       *count,nidx,*indices,*newidx,start=0;
1113     /* Get node count in each partition */
1114     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1115     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1116     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1117       for (i=0; i<n; i++) count[i] *= bs;
1118     }
1119     /* Build indices from node numbering */
1120     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1121     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1122     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1123     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1124     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1125     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1126     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1127       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1128       for (i=0; i<nidx; i++)
1129 	for (j=0; j<bs; j++)
1130 	  newidx[i*bs+j] = indices[i]*bs + j;
1131       ierr = PetscFree(indices);CHKERRQ(ierr);
1132       nidx   *= bs;
1133       indices = newidx;
1134     }
1135     /* Shift to get global indices */
1136     for (i=0; i<nidx; i++) indices[i] += rstart;
1137 
1138     /* Build the index sets for each block */
1139     for (i=0; i<n; i++) {
1140       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr);
1141       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1142       start += count[i];
1143     }
1144 
1145     ierr = PetscFree(count);
1146     ierr = PetscFree(indices);
1147     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1148     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1149 
1150   }
1151 
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "PCASMDestroySubdomains"
1157 /*@C
1158    PCASMDestroySubdomains - Destroys the index sets created with
1159    PCASMCreateSubdomains(). Should be called after setting subdomains
1160    with PCASMSetLocalSubdomains().
1161 
1162    Collective
1163 
1164    Input Parameters:
1165 +  n - the number of index sets
1166 .  is - the array of index sets
1167 -  is_local - the array of local index sets, can be PETSC_NULL
1168 
1169    Level: advanced
1170 
1171 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1172 
1173 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1174 @*/
1175 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[], IS is_local[])
1176 {
1177   PetscInt       i;
1178   PetscErrorCode ierr;
1179   PetscFunctionBegin;
1180   if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1181   PetscValidPointer(is,2);
1182   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1183   ierr = PetscFree(is);CHKERRQ(ierr);
1184   if (is_local) {
1185     PetscValidPointer(is_local,3);
1186     for (i=0; i<n; i++) { ierr = ISDestroy(is_local[i]);CHKERRQ(ierr); }
1187     ierr = PetscFree(is_local);CHKERRQ(ierr);
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "PCASMCreateSubdomains2D"
1194 /*@
1195    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1196    preconditioner for a two-dimensional problem on a regular grid.
1197 
1198    Not Collective
1199 
1200    Input Parameters:
1201 +  m, n - the number of mesh points in the x and y directions
1202 .  M, N - the number of subdomains in the x and y directions
1203 .  dof - degrees of freedom per node
1204 -  overlap - overlap in mesh lines
1205 
1206    Output Parameters:
1207 +  Nsub - the number of subdomains created
1208 -  is - the array of index sets defining the subdomains
1209 
1210    Note:
1211    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1212    preconditioners.  More general related routines are
1213    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1214 
1215    Level: advanced
1216 
1217 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1218 
1219 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1220           PCASMSetOverlap()
1221 @*/
1222 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is)
1223 {
1224   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter;
1225   PetscErrorCode ierr;
1226   PetscInt       nidx,*idx,loc,ii,jj,count;
1227 
1228   PetscFunctionBegin;
1229   if (dof != 1) SETERRQ(PETSC_ERR_SUP," ");
1230 
1231   *Nsub = N*M;
1232   ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr);
1233   ystart = 0;
1234   loc_outter = 0;
1235   for (i=0; i<N; i++) {
1236     height = n/N + ((n % N) > i); /* height of subdomain */
1237     if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1238     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1239     yright = ystart + height + overlap; if (yright > n) yright = n;
1240     xstart = 0;
1241     for (j=0; j<M; j++) {
1242       width = m/M + ((m % M) > j); /* width of subdomain */
1243       if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1244       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1245       xright = xstart + width + overlap; if (xright > m) xright = m;
1246       nidx   = (xright - xleft)*(yright - yleft);
1247       ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1248       loc    = 0;
1249       for (ii=yleft; ii<yright; ii++) {
1250         count = m*ii + xleft;
1251         for (jj=xleft; jj<xright; jj++) {
1252           idx[loc++] = count++;
1253         }
1254       }
1255       ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr);
1256       ierr = PetscFree(idx);CHKERRQ(ierr);
1257       xstart += width;
1258     }
1259     ystart += height;
1260   }
1261   for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "PCASMGetLocalSubdomains"
1267 /*@C
1268     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1269     only) for the additive Schwarz preconditioner.
1270 
1271     Collective on PC
1272 
1273     Input Parameter:
1274 .   pc - the preconditioner context
1275 
1276     Output Parameters:
1277 +   n - the number of subdomains for this processor (default value = 1)
1278 .   is - the index sets that define the subdomains for this processor
1279 -   is_local - the index sets that define the local part of the subdomains for this processor (can be PETSC_NULL)
1280 
1281 
1282     Notes:
1283     The IS numbering is in the parallel, global numbering of the vector.
1284 
1285     Level: advanced
1286 
1287 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1288 
1289 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1290           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1291 @*/
1292 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[],IS *is_local[])
1293 {
1294   PC_ASM         *osm;
1295   PetscErrorCode ierr;
1296   PetscTruth     match;
1297 
1298   PetscFunctionBegin;
1299   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1300   PetscValidIntPointer(n,2);
1301   if (is) PetscValidPointer(is,3);
1302   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1303   if (!match) {
1304     if (n)  *n  = 0;
1305     if (is) *is = PETSC_NULL;
1306   } else {
1307     osm = (PC_ASM*)pc->data;
1308     if (n)  *n  = osm->n_local_true;
1309     if (is) *is = osm->is;
1310     if (is_local) *is_local = osm->is_local;
1311   }
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "PCASMGetLocalSubmatrices"
1317 /*@C
1318     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1319     only) for the additive Schwarz preconditioner.
1320 
1321     Collective on PC
1322 
1323     Input Parameter:
1324 .   pc - the preconditioner context
1325 
1326     Output Parameters:
1327 +   n - the number of matrices for this processor (default value = 1)
1328 -   mat - the matrices
1329 
1330 
1331     Level: advanced
1332 
1333 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1334 
1335 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1336           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1337 @*/
1338 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1339 {
1340   PC_ASM         *osm;
1341   PetscErrorCode ierr;
1342   PetscTruth     match;
1343 
1344   PetscFunctionBegin;
1345   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1346   PetscValidIntPointer(n,2);
1347   if (mat) PetscValidPointer(mat,3);
1348   if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1349   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1350   if (!match) {
1351     if (n)   *n   = 0;
1352     if (mat) *mat = PETSC_NULL;
1353   } else {
1354     osm = (PC_ASM*)pc->data;
1355     if (n)   *n   = osm->n_local_true;
1356     if (mat) *mat = osm->pmat;
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360