xref: /petsc/src/ksp/pc/impls/asm/asm.c (revision 7a4fe282d1b349e95b3be72d69d8dd3d3bcd7bc6)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines an additive Schwarz preconditioner for any Mat implementation.
5 
6   Note that each processor may have any number of subdomains. But in order to
7   deal easily with the VecScatter(), we treat each processor as if it has the
8   same number of subdomains.
9 
10        n - total number of true subdomains on all processors
11        n_local_true - actual number of subdomains on this processor
12        n_local = maximum over all processors of n_local_true
13 */
14 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
15 
16 typedef struct {
17   PetscInt   n,n_local,n_local_true;
18   PetscInt   overlap;             /* overlap requested by user */
19   KSP        *ksp;                /* linear solvers for each block */
20   VecScatter *scat;               /* mapping to subregion */
21   Vec        *x,*y;               /* work vectors */
22   IS         *is;                 /* index set that defines each subdomain */
23   Mat        *mat,*pmat;          /* mat is not currently used */
24   PCASMType  type;                /* use reduced interpolation, restriction or both */
25   PetscTruth type_set;            /* if user set this value (so won't change it for symmetric problems) */
26   PetscTruth same_local_solves;   /* flag indicating whether all local solvers are same */
27   PetscTruth inplace;             /* indicates that the sub-matrices are deleted after
28                                      PCSetUpOnBlocks() is done. Similar to inplace
29                                      factorization in the case of LU and ILU */
30 } PC_ASM;
31 
32 #undef __FUNCT__
33 #define __FUNCT__ "PCView_ASM"
34 static PetscErrorCode PCView_ASM(PC pc,PetscViewer viewer)
35 {
36   PC_ASM         *osm = (PC_ASM*)pc->data;
37   PetscErrorCode ierr;
38   PetscMPIInt    rank;
39   PetscInt       i,bsz;
40   PetscTruth     iascii,isstring;
41   PetscViewer    sviewer;
42 
43 
44   PetscFunctionBegin;
45   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
46   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
47   if (iascii) {
48     if (osm->n > 0) {
49       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks = %D, amount of overlap = %D\n",osm->n,osm->overlap);CHKERRQ(ierr);
50     } else {
51       ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: total subdomain blocks not yet set, amount of overlap = %D\n",osm->overlap);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerASCIIPrintf(viewer,"  Additive Schwarz: restriction/interpolation type - %s\n",PCASMTypes[osm->type]);CHKERRQ(ierr);
54     ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
55     if (osm->same_local_solves) {
56       if (osm->ksp) {
57 	ierr = PetscViewerASCIIPrintf(viewer,"  Local solve is same for all blocks, in the following KSP and PC objects:\n");CHKERRQ(ierr);
58 	ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
59 	if (!rank) {
60 	  ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
61 	  ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);
62 	  ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
63 	}
64 	ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
65       }
66     } else {
67       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] number of local blocks = %D\n",(int)rank,osm->n_local_true);CHKERRQ(ierr);
68       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
69       ierr = PetscViewerASCIIPrintf(viewer,"  Local solve info for each block is in the following KSP and PC objects:\n");CHKERRQ(ierr);
70       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
71       ierr = PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
72       for (i=0; i<osm->n_local; i++) {
73         ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
74         if (i < osm->n_local_true) {
75 	  ierr = ISGetLocalSize(osm->is[i],&bsz);CHKERRQ(ierr);
76           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"[%d] local block number %D, size = %D\n",(int)rank,i,bsz);CHKERRQ(ierr);
77           ierr = KSPView(osm->ksp[i],sviewer);CHKERRQ(ierr);
78           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
79         }
80         ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
81       }
82       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
83       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
84     }
85   } else if (isstring) {
86     ierr = PetscViewerStringSPrintf(viewer," blocks=%D, overlap=%D, type=%s",osm->n,osm->overlap,PCASMTypes[osm->type]);CHKERRQ(ierr);
87     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
88     if (osm->ksp) {ierr = KSPView(osm->ksp[0],sviewer);CHKERRQ(ierr);}
89     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
90   } else {
91     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASM",((PetscObject)viewer)->type_name);
92   }
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "PCASMPrintSubdomains"
98 static PetscErrorCode PCASMPrintSubdomains(PC pc)
99 {
100   PC_ASM         *osm  = (PC_ASM*)pc->data;
101   const char     *prefix;
102   char           fname[PETSC_MAX_PATH_LEN+1];
103   PetscViewer    viewer;
104   PetscInt       i,j,nidx;
105   const PetscInt *idx;
106   PetscErrorCode ierr;
107   PetscFunctionBegin;
108   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
109   ierr = PetscOptionsGetString(prefix,"-pc_asm_print_subdomains",
110 			       fname,PETSC_MAX_PATH_LEN,PETSC_NULL);CHKERRQ(ierr);
111   if (fname[0] == 0) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
112   ierr = PetscViewerASCIIOpen(((PetscObject)pc)->comm,fname,&viewer);CHKERRQ(ierr);
113   for (i=0;i<osm->n_local_true;i++) {
114     ierr = ISGetLocalSize(osm->is[i],&nidx);CHKERRQ(ierr);
115     ierr = ISGetIndices(osm->is[i],&idx);CHKERRQ(ierr);
116     for (j=0; j<nidx; j++) {
117       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"%D ",idx[j]);CHKERRQ(ierr);
118     }
119     ierr = ISRestoreIndices(osm->is[i],&idx);CHKERRQ(ierr);
120     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"\n");CHKERRQ(ierr);
121   }
122   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
123   ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "PCSetUp_ASM"
129 static PetscErrorCode PCSetUp_ASM(PC pc)
130 {
131   PC_ASM         *osm  = (PC_ASM*)pc->data;
132   PetscErrorCode ierr;
133   PetscTruth     symset,flg;
134   PetscInt       i,m;
135   PetscMPIInt    size;
136   MatReuse       scall = MAT_REUSE_MATRIX;
137   IS             isl;
138   KSP            ksp;
139   PC             subpc;
140   const char     *prefix,*pprefix;
141   Vec            vec;
142 
143   PetscFunctionBegin;
144   if (!pc->setupcalled) {
145 
146     if (!osm->type_set) {
147       ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
148       if (symset && flg) { osm->type = PC_ASM_BASIC; }
149     }
150 
151     if (osm->n == PETSC_DECIDE && osm->n_local_true < 1) {
152       /* no subdomains given, use one per processor */
153       osm->n_local = osm->n_local_true = 1;
154       ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
155       osm->n = size;
156     } else if (osm->n == PETSC_DECIDE) {
157       /* determine global number of subdomains */
158       PetscInt inwork[2],outwork[2];
159       inwork[0] = inwork[1] = osm->n_local_true;
160       ierr = MPI_Allreduce(inwork,outwork,1,MPIU_2INT,PetscMaxSum_Op,((PetscObject)pc)->comm);CHKERRQ(ierr);
161       osm->n_local = outwork[0];
162       osm->n       = outwork[1];
163     }
164 
165     if (!osm->is){ /* create the index sets */
166       ierr = PCASMCreateSubdomains(pc->pmat,osm->n_local_true,&osm->is);CHKERRQ(ierr);
167     }
168     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
169     ierr = PetscOptionsHasName(prefix,"-pc_asm_print_subdomains",&flg);CHKERRQ(ierr);
170     if (flg) { ierr = PCASMPrintSubdomains(pc);CHKERRQ(ierr); }
171 
172     /*  Extend the "overlapping" regions by a number of steps  */
173     ierr = MatIncreaseOverlap(pc->pmat,osm->n_local_true,osm->is,osm->overlap);CHKERRQ(ierr);
174     for (i=0; i<osm->n_local_true; i++) {
175       ierr = ISSort(osm->is[i]);CHKERRQ(ierr);
176     }
177 
178     /* Create the local work vectors and scatter contexts */
179     ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
180     ierr = PetscMalloc(osm->n_local*sizeof(VecScatter *),&osm->scat);CHKERRQ(ierr);
181     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->x);CHKERRQ(ierr);
182     ierr = PetscMalloc(osm->n_local*sizeof(Vec *),&osm->y);CHKERRQ(ierr);
183     for (i=0; i<osm->n_local_true; i++) {
184       ierr = ISGetLocalSize(osm->is[i],&m);CHKERRQ(ierr);
185       ierr = VecCreateSeq(PETSC_COMM_SELF,m,&osm->x[i]);CHKERRQ(ierr);
186       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
187       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
188       ierr = VecScatterCreate(vec,osm->is[i],osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr);
189       ierr = ISDestroy(isl);CHKERRQ(ierr);
190     }
191     for (i=osm->n_local_true; i<osm->n_local; i++) {
192       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&osm->x[i]);CHKERRQ(ierr);
193       ierr = VecDuplicate(osm->x[i],&osm->y[i]);CHKERRQ(ierr);
194       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&isl);CHKERRQ(ierr);
195       ierr = VecScatterCreate(vec,isl,osm->x[i],isl,&osm->scat[i]);CHKERRQ(ierr);
196       ierr = ISDestroy(isl);CHKERRQ(ierr);
197     }
198     ierr = VecDestroy(vec);CHKERRQ(ierr);
199 
200     /* Create the local solvers */
201     ierr = PetscMalloc(osm->n_local_true*sizeof(KSP *),&osm->ksp);CHKERRQ(ierr);
202     for (i=0; i<osm->n_local_true; i++) {
203       ierr = KSPCreate(PETSC_COMM_SELF,&ksp);CHKERRQ(ierr);
204       ierr = PetscLogObjectParent(pc,ksp);CHKERRQ(ierr);
205       ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
206       ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
207       ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr);
208       ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
209       ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
210       ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
211       osm->ksp[i] = ksp;
212     }
213     scall = MAT_INITIAL_MATRIX;
214 
215   } else {
216     /*
217        Destroy the blocks from the previous iteration
218     */
219     if (pc->flag == DIFFERENT_NONZERO_PATTERN || osm->inplace) {
220       if (!osm->inplace) {
221 	ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
222       }
223       scall = MAT_INITIAL_MATRIX;
224     }
225   }
226 
227   /*
228      Extract out the submatrices
229   */
230   ierr = MatGetSubMatrices(pc->pmat,osm->n_local_true,osm->is,osm->is,scall,&osm->pmat);CHKERRQ(ierr);
231   if (scall == MAT_INITIAL_MATRIX) {
232     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
233     for (i=0; i<osm->n_local_true; i++) {
234       ierr = PetscLogObjectParent(pc,osm->pmat[i]);CHKERRQ(ierr);
235       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
236     }
237   }
238 
239   /* Return control to the user so that the submatrices can be modified (e.g., to apply
240      different boundary conditions for the submatrices than for the global problem) */
241   ierr = PCModifySubMatrices(pc,osm->n_local_true,osm->is,osm->is,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
242 
243   /*
244      Loop over subdomains putting them into local ksp
245   */
246   for (i=0; i<osm->n_local_true; i++) {
247     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i],pc->flag);CHKERRQ(ierr);
248     if (!pc->setupcalled) {
249       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
250     }
251   }
252 
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "PCSetUpOnBlocks_ASM"
258 static PetscErrorCode PCSetUpOnBlocks_ASM(PC pc)
259 {
260   PC_ASM         *osm = (PC_ASM*)pc->data;
261   PetscErrorCode ierr;
262   PetscInt       i;
263 
264   PetscFunctionBegin;
265   for (i=0; i<osm->n_local_true; i++) {
266     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
267   }
268   /*
269      If inplace flag is set, then destroy the matrix after the setup
270      on blocks is done.
271   */
272   if (osm->inplace && osm->n_local_true > 0) {
273     ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "PCApply_ASM"
280 static PetscErrorCode PCApply_ASM(PC pc,Vec x,Vec y)
281 {
282   PC_ASM         *osm = (PC_ASM*)pc->data;
283   PetscErrorCode ierr;
284   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
285   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
286 
287   PetscFunctionBegin;
288   /*
289      Support for limiting the restriction or interpolation to only local
290      subdomain values (leaving the other values 0).
291   */
292   if (!(osm->type & PC_ASM_RESTRICT)) {
293     forward = SCATTER_FORWARD_LOCAL;
294     /* have to zero the work RHS since scatter may leave some slots empty */
295     for (i=0; i<n_local_true; i++) {
296       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
297     }
298   }
299   if (!(osm->type & PC_ASM_INTERPOLATE)) {
300     reverse = SCATTER_REVERSE_LOCAL;
301   }
302 
303   for (i=0; i<n_local; i++) {
304     ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
305   }
306   ierr = VecSet(y,0.0);CHKERRQ(ierr);
307   /* do the local solves */
308   for (i=0; i<n_local_true; i++) {
309     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
310     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
311     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
312   }
313   /* handle the rest of the scatters that do not have local solves */
314   for (i=n_local_true; i<n_local; i++) {
315     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
316     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
317   }
318   for (i=0; i<n_local; i++) {
319     ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "PCApplyTranspose_ASM"
326 static PetscErrorCode PCApplyTranspose_ASM(PC pc,Vec x,Vec y)
327 {
328   PC_ASM         *osm = (PC_ASM*)pc->data;
329   PetscErrorCode ierr;
330   PetscInt       i,n_local = osm->n_local,n_local_true = osm->n_local_true;
331   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
332 
333   PetscFunctionBegin;
334   /*
335      Support for limiting the restriction or interpolation to only local
336      subdomain values (leaving the other values 0).
337 
338      Note: these are reversed from the PCApply_ASM() because we are applying the
339      transpose of the three terms
340   */
341   if (!(osm->type & PC_ASM_INTERPOLATE)) {
342     forward = SCATTER_FORWARD_LOCAL;
343     /* have to zero the work RHS since scatter may leave some slots empty */
344     for (i=0; i<n_local_true; i++) {
345       ierr = VecSet(osm->x[i],0.0);CHKERRQ(ierr);
346     }
347   }
348   if (!(osm->type & PC_ASM_RESTRICT)) {
349     reverse = SCATTER_REVERSE_LOCAL;
350   }
351 
352   for (i=0; i<n_local; i++) {
353     ierr = VecScatterBegin(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
354   }
355   ierr = VecSet(y,0.0);CHKERRQ(ierr);
356   /* do the local solves */
357   for (i=0; i<n_local_true; i++) {
358     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
359     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
360     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
361   }
362   /* handle the rest of the scatters that do not have local solves */
363   for (i=n_local_true; i<n_local; i++) {
364     ierr = VecScatterEnd(osm->scat[i],x,osm->x[i],INSERT_VALUES,forward);CHKERRQ(ierr);
365     ierr = VecScatterBegin(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
366   }
367   for (i=0; i<n_local; i++) {
368     ierr = VecScatterEnd(osm->scat[i],osm->y[i],y,ADD_VALUES,reverse);CHKERRQ(ierr);
369   }
370   PetscFunctionReturn(0);
371 }
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "PCDestroy_ASM"
375 static PetscErrorCode PCDestroy_ASM(PC pc)
376 {
377   PC_ASM         *osm = (PC_ASM*)pc->data;
378   PetscErrorCode ierr;
379   PetscInt       i;
380 
381   PetscFunctionBegin;
382   if (osm->ksp) {
383     for (i=0; i<osm->n_local_true; i++) {
384       ierr = KSPDestroy(osm->ksp[i]);CHKERRQ(ierr);
385     }
386     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
387   }
388   if (osm->pmat && !osm->inplace) {
389     if (osm->n_local_true > 0) {
390       ierr = MatDestroyMatrices(osm->n_local_true,&osm->pmat);CHKERRQ(ierr);
391     }
392   }
393   if (osm->scat) {
394     for (i=0; i<osm->n_local; i++) {
395       ierr = VecScatterDestroy(osm->scat[i]);CHKERRQ(ierr);
396       ierr = VecDestroy(osm->x[i]);CHKERRQ(ierr);
397       ierr = VecDestroy(osm->y[i]);CHKERRQ(ierr);
398     }
399     ierr = PetscFree(osm->scat);CHKERRQ(ierr);
400     ierr = PetscFree(osm->x);CHKERRQ(ierr);
401     ierr = PetscFree(osm->y);CHKERRQ(ierr);
402   }
403   if (osm->is) {
404     ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is);CHKERRQ(ierr);
405   }
406   ierr = PetscFree(osm);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "PCSetFromOptions_ASM"
412 static PetscErrorCode PCSetFromOptions_ASM(PC pc)
413 {
414   PC_ASM         *osm = (PC_ASM*)pc->data;
415   PetscErrorCode ierr;
416   PetscInt       blocks,ovl;
417   PetscTruth     symset,flg;
418   PCASMType      asmtype;
419 
420   PetscFunctionBegin;
421   /* set the type to symmetric if matrix is symmetric */
422   if (!osm->type_set && pc->pmat) {
423     ierr = MatIsSymmetricKnown(pc->pmat,&symset,&flg);CHKERRQ(ierr);
424     if (symset && flg) { osm->type = PC_ASM_BASIC; }
425   }
426   ierr = PetscOptionsHead("Additive Schwarz options");CHKERRQ(ierr);
427     ierr = PetscOptionsInt("-pc_asm_blocks","Number of subdomains","PCASMSetTotalSubdomains",
428 			   osm->n,&blocks,&flg);CHKERRQ(ierr);
429     if (flg) {ierr = PCASMSetTotalSubdomains(pc,blocks,PETSC_NULL);CHKERRQ(ierr); }
430     ierr = PetscOptionsInt("-pc_asm_overlap","Number of grid points overlap","PCASMSetOverlap",
431 			   osm->overlap,&ovl,&flg);CHKERRQ(ierr);
432     if (flg) {ierr = PCASMSetOverlap(pc,ovl);CHKERRQ(ierr); }
433     ierr = PetscOptionsName("-pc_asm_in_place","Perform matrix factorization inplace","PCASMSetUseInPlace",&flg);CHKERRQ(ierr);
434     if (flg) {ierr = PCASMSetUseInPlace(pc);CHKERRQ(ierr); }
435     ierr = PetscOptionsEnum("-pc_asm_type","Type of restriction/extension","PCASMSetType",
436 			    PCASMTypes,(PetscEnum)osm->type,(PetscEnum*)&asmtype,&flg);CHKERRQ(ierr);
437     if (flg) {ierr = PCASMSetType(pc,asmtype);CHKERRQ(ierr); }
438   ierr = PetscOptionsTail();CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 /*------------------------------------------------------------------------------------*/
443 
444 EXTERN_C_BEGIN
445 #undef __FUNCT__
446 #define __FUNCT__ "PCASMSetLocalSubdomains_ASM"
447 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains_ASM(PC pc,PetscInt n,IS is[])
448 {
449   PC_ASM         *osm = (PC_ASM*)pc->data;
450   PetscErrorCode ierr;
451   PetscInt       i;
452 
453   PetscFunctionBegin;
454   if (n < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more blocks, n = %D",n);
455   if (pc->setupcalled && (n != osm->n_local_true || is)) {
456     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetLocalSubdomains() should be called before calling PCSetup().");
457   }
458   if (!pc->setupcalled) {
459     if (is) {
460       for (i=0; i<n; i++) {ierr = PetscObjectReference((PetscObject)is[i]);CHKERRQ(ierr);}
461     }
462     if (osm->is) {
463       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is);CHKERRQ(ierr);
464     }
465     osm->n_local_true = n;
466     osm->is           = 0;
467     if (is) {
468       ierr = PetscMalloc(n*sizeof(IS *),&osm->is);CHKERRQ(ierr);
469       for (i=0; i<n; i++) { osm->is[i] = is[i]; }
470     }
471   }
472   PetscFunctionReturn(0);
473 }
474 EXTERN_C_END
475 
476 EXTERN_C_BEGIN
477 #undef __FUNCT__
478 #define __FUNCT__ "PCASMSetTotalSubdomains_ASM"
479 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains_ASM(PC pc,PetscInt N,IS *is)
480 {
481   PC_ASM         *osm = (PC_ASM*)pc->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    rank,size;
484   PetscInt       n;
485 
486   PetscFunctionBegin;
487   if (N < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Number of total blocks must be > 0, N = %D",N);
488   if (is) SETERRQ(PETSC_ERR_SUP,"Use PCASMSetLocalSubdomains() to set specific index sets\n\they cannot be set globally yet.");
489 
490   /*
491      Split the subdomains equally amoung all processors
492   */
493   ierr = MPI_Comm_rank(((PetscObject)pc)->comm,&rank);CHKERRQ(ierr);
494   ierr = MPI_Comm_size(((PetscObject)pc)->comm,&size);CHKERRQ(ierr);
495   n = N/size + ((N % size) > rank);
496   if (!n) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"Process %d must have at least one block: total processors %d total blocks %D",(int)rank,(int)size,N);
497   if (pc->setupcalled && n != osm->n_local_true) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetTotalSubdomains() should be called before PCSetup().");
498   if (!pc->setupcalled) {
499     if (osm->is) {
500       ierr = PCASMDestroySubdomains(osm->n_local_true,osm->is);CHKERRQ(ierr);
501     }
502     osm->n_local_true = n;
503     osm->is           = 0;
504   }
505   PetscFunctionReturn(0);
506 }
507 EXTERN_C_END
508 
509 EXTERN_C_BEGIN
510 #undef __FUNCT__
511 #define __FUNCT__ "PCASMSetOverlap_ASM"
512 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap_ASM(PC pc,PetscInt ovl)
513 {
514   PC_ASM *osm = (PC_ASM*)pc->data;
515 
516   PetscFunctionBegin;
517   if (ovl < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
518   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"PCASMSetOverlap() should be called before PCSetup().");
519   if (!pc->setupcalled) {
520     osm->overlap = ovl;
521   }
522   PetscFunctionReturn(0);
523 }
524 EXTERN_C_END
525 
526 EXTERN_C_BEGIN
527 #undef __FUNCT__
528 #define __FUNCT__ "PCASMSetType_ASM"
529 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType_ASM(PC pc,PCASMType type)
530 {
531   PC_ASM *osm = (PC_ASM*)pc->data;
532 
533   PetscFunctionBegin;
534   osm->type     = type;
535   osm->type_set = PETSC_TRUE;
536   PetscFunctionReturn(0);
537 }
538 EXTERN_C_END
539 
540 EXTERN_C_BEGIN
541 #undef __FUNCT__
542 #define __FUNCT__ "PCASMGetSubKSP_ASM"
543 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP_ASM(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
544 {
545   PC_ASM         *osm = (PC_ASM*)pc->data;
546   PetscErrorCode ierr;
547 
548   PetscFunctionBegin;
549   if (osm->n_local_true < 1) {
550     SETERRQ(PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");
551   }
552 
553   if (n_local) {
554     *n_local = osm->n_local_true;
555   }
556   if (first_local) {
557     ierr = MPI_Scan(&osm->n_local_true,first_local,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
558     *first_local -= osm->n_local_true;
559   }
560   if (ksp) {
561     /* Assume that local solves are now different; not necessarily
562        true though!  This flag is used only for PCView_ASM() */
563     *ksp                   = osm->ksp;
564     osm->same_local_solves = PETSC_FALSE;
565   }
566   PetscFunctionReturn(0);
567 }
568 EXTERN_C_END
569 
570 EXTERN_C_BEGIN
571 #undef __FUNCT__
572 #define __FUNCT__ "PCASMSetUseInPlace_ASM"
573 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace_ASM(PC pc)
574 {
575   PC_ASM *osm = (PC_ASM*)pc->data;
576 
577   PetscFunctionBegin;
578   osm->inplace = PETSC_TRUE;
579   PetscFunctionReturn(0);
580 }
581 EXTERN_C_END
582 
583 /*----------------------------------------------------------------------------*/
584 #undef __FUNCT__
585 #define __FUNCT__ "PCASMSetUseInPlace"
586 /*@
587    PCASMSetUseInPlace - Tells the system to destroy the matrix after setup is done.
588 
589    Collective on PC
590 
591    Input Parameters:
592 .  pc - the preconditioner context
593 
594    Options Database Key:
595 .  -pc_asm_in_place - Activates in-place factorization
596 
597    Note:
598    PCASMSetUseInplace() can only be used with the KSP method KSPPREONLY, and
599    when the original matrix is not required during the Solve process.
600    This destroys the matrix, early thus, saving on memory usage.
601 
602    Level: intermediate
603 
604 .keywords: PC, set, factorization, direct, inplace, in-place, ASM
605 
606 .seealso: PCFactorSetUseInPlace()
607 @*/
608 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetUseInPlace(PC pc)
609 {
610   PetscErrorCode ierr,(*f)(PC);
611 
612   PetscFunctionBegin;
613   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
614   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetUseInPlace_C",(void (**)(void))&f);CHKERRQ(ierr);
615   if (f) {
616     ierr = (*f)(pc);CHKERRQ(ierr);
617   }
618   PetscFunctionReturn(0);
619 }
620 /*----------------------------------------------------------------------------*/
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "PCASMSetLocalSubdomains"
624 /*@C
625     PCASMSetLocalSubdomains - Sets the local subdomains (for this processor
626     only) for the additive Schwarz preconditioner.
627 
628     Collective on PC
629 
630     Input Parameters:
631 +   pc - the preconditioner context
632 .   n - the number of subdomains for this processor (default value = 1)
633 -   is - the index sets that define the subdomains for this processor
634          (or PETSC_NULL for PETSc to determine subdomains)
635 
636     Notes:
637     The IS numbering is in the parallel, global numbering of the vector.
638 
639     By default the ASM preconditioner uses 1 block per processor.
640 
641     These index sets cannot be destroyed until after completion of the
642     linear solves for which the ASM preconditioner is being used.
643 
644     Use PCASMSetTotalSubdomains() to set the subdomains for all processors.
645 
646     Level: advanced
647 
648 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
649 
650 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
651           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
652 @*/
653 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetLocalSubdomains(PC pc,PetscInt n,IS is[])
654 {
655   PetscErrorCode ierr,(*f)(PC,PetscInt,IS[]);
656 
657   PetscFunctionBegin;
658   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
659   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetLocalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
660   if (f) {
661     ierr = (*f)(pc,n,is);CHKERRQ(ierr);
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "PCASMSetTotalSubdomains"
668 /*@C
669     PCASMSetTotalSubdomains - Sets the subdomains for all processor for the
670     additive Schwarz preconditioner.  Either all or no processors in the
671     PC communicator must call this routine, with the same index sets.
672 
673     Collective on PC
674 
675     Input Parameters:
676 +   pc - the preconditioner context
677 .   n - the number of subdomains for all processors
678 -   is - the index sets that define the subdomains for all processor
679          (or PETSC_NULL for PETSc to determine subdomains)
680 
681     Options Database Key:
682     To set the total number of subdomain blocks rather than specify the
683     index sets, use the option
684 .    -pc_asm_blocks <blks> - Sets total blocks
685 
686     Notes:
687     Currently you cannot use this to set the actual subdomains with the argument is.
688 
689     By default the ASM preconditioner uses 1 block per processor.
690 
691     These index sets cannot be destroyed until after completion of the
692     linear solves for which the ASM preconditioner is being used.
693 
694     Use PCASMSetLocalSubdomains() to set local subdomains.
695 
696     Level: advanced
697 
698 .keywords: PC, ASM, set, total, global, subdomains, additive Schwarz
699 
700 .seealso: PCASMSetLocalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
701           PCASMCreateSubdomains2D()
702 @*/
703 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetTotalSubdomains(PC pc,PetscInt N,IS *is)
704 {
705   PetscErrorCode ierr,(*f)(PC,PetscInt,IS *);
706 
707   PetscFunctionBegin;
708   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
709   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetTotalSubdomains_C",(void (**)(void))&f);CHKERRQ(ierr);
710   if (f) {
711     ierr = (*f)(pc,N,is);CHKERRQ(ierr);
712   }
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "PCASMSetOverlap"
718 /*@
719     PCASMSetOverlap - Sets the overlap between a pair of subdomains for the
720     additive Schwarz preconditioner.  Either all or no processors in the
721     PC communicator must call this routine.
722 
723     Collective on PC
724 
725     Input Parameters:
726 +   pc  - the preconditioner context
727 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 1)
728 
729     Options Database Key:
730 .   -pc_asm_overlap <ovl> - Sets overlap
731 
732     Notes:
733     By default the ASM preconditioner uses 1 block per processor.  To use
734     multiple blocks per perocessor, see PCASMSetTotalSubdomains() and
735     PCASMSetLocalSubdomains() (and the option -pc_asm_blocks <blks>).
736 
737     The overlap defaults to 1, so if one desires that no additional
738     overlap be computed beyond what may have been set with a call to
739     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(), then ovl
740     must be set to be 0.  In particular, if one does not explicitly set
741     the subdomains an application code, then all overlap would be computed
742     internally by PETSc, and using an overlap of 0 would result in an ASM
743     variant that is equivalent to the block Jacobi preconditioner.
744 
745     Note that one can define initial index sets with any overlap via
746     PCASMSetTotalSubdomains() or PCASMSetLocalSubdomains(); the routine
747     PCASMSetOverlap() merely allows PETSc to extend that overlap further
748     if desired.
749 
750     Level: intermediate
751 
752 .keywords: PC, ASM, set, overlap
753 
754 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
755           PCASMCreateSubdomains2D(), PCASMGetLocalSubdomains()
756 @*/
757 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetOverlap(PC pc,PetscInt ovl)
758 {
759   PetscErrorCode ierr,(*f)(PC,PetscInt);
760 
761   PetscFunctionBegin;
762   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
763   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetOverlap_C",(void (**)(void))&f);CHKERRQ(ierr);
764   if (f) {
765     ierr = (*f)(pc,ovl);CHKERRQ(ierr);
766   }
767   PetscFunctionReturn(0);
768 }
769 
770 #undef __FUNCT__
771 #define __FUNCT__ "PCASMSetType"
772 /*@
773     PCASMSetType - Sets the type of restriction and interpolation used
774     for local problems in the additive Schwarz method.
775 
776     Collective on PC
777 
778     Input Parameters:
779 +   pc  - the preconditioner context
780 -   type - variant of ASM, one of
781 .vb
782       PC_ASM_BASIC       - full interpolation and restriction
783       PC_ASM_RESTRICT    - full restriction, local processor interpolation
784       PC_ASM_INTERPOLATE - full interpolation, local processor restriction
785       PC_ASM_NONE        - local processor restriction and interpolation
786 .ve
787 
788     Options Database Key:
789 .   -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
790 
791     Level: intermediate
792 
793 .keywords: PC, ASM, set, type
794 
795 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMGetSubKSP(),
796           PCASMCreateSubdomains2D()
797 @*/
798 PetscErrorCode PETSCKSP_DLLEXPORT PCASMSetType(PC pc,PCASMType type)
799 {
800   PetscErrorCode ierr,(*f)(PC,PCASMType);
801 
802   PetscFunctionBegin;
803   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
804   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMSetType_C",(void (**)(void))&f);CHKERRQ(ierr);
805   if (f) {
806     ierr = (*f)(pc,type);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "PCASMGetSubKSP"
813 /*@C
814    PCASMGetSubKSP - Gets the local KSP contexts for all blocks on
815    this processor.
816 
817    Collective on PC iff first_local is requested
818 
819    Input Parameter:
820 .  pc - the preconditioner context
821 
822    Output Parameters:
823 +  n_local - the number of blocks on this processor or PETSC_NULL
824 .  first_local - the global number of the first block on this processor or PETSC_NULL,
825                  all processors must request or all must pass PETSC_NULL
826 -  ksp - the array of KSP contexts
827 
828    Note:
829    After PCASMGetSubKSP() the array of KSPes is not to be freed
830 
831    Currently for some matrix implementations only 1 block per processor
832    is supported.
833 
834    You must call KSPSetUp() before calling PCASMGetSubKSP().
835 
836    Level: advanced
837 
838 .keywords: PC, ASM, additive Schwarz, get, sub, KSP, context
839 
840 .seealso: PCASMSetTotalSubdomains(), PCASMSetTotalSubdomains(), PCASMSetOverlap(),
841           PCASMCreateSubdomains2D(),
842 @*/
843 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
844 {
845   PetscErrorCode ierr,(*f)(PC,PetscInt*,PetscInt*,KSP **);
846 
847   PetscFunctionBegin;
848   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
849   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCASMGetSubKSP_C",(void (**)(void))&f);CHKERRQ(ierr);
850   if (f) {
851     ierr = (*f)(pc,n_local,first_local,ksp);CHKERRQ(ierr);
852   } else {
853     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot get subksp for this type of PC");
854   }
855 
856  PetscFunctionReturn(0);
857 }
858 
859 /* -------------------------------------------------------------------------------------*/
860 /*MC
861    PCASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
862            its own KSP object.
863 
864    Options Database Keys:
865 +  -pc_asm_truelocal - Activates PCASMSetUseTrueLocal()
866 .  -pc_asm_in_place - Activates in-place factorization
867 .  -pc_asm_blocks <blks> - Sets total blocks
868 .  -pc_asm_overlap <ovl> - Sets overlap
869 -  -pc_asm_type [basic,restrict,interpolate,none] - Sets ASM type
870 
871      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
872       will get a different convergence rate due to the default option of -pc_asm_type restrict. Use
873       -pc_asm_type basic to use the standard ASM.
874 
875    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
876      than one processor. Defaults to one block per processor.
877 
878      To set options on the solvers for each block append -sub_ to all the KSP, and PC
879         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
880 
881      To set the options on the solvers separate for each block call PCASMGetSubKSP()
882          and set the options directly on the resulting KSP object (you can access its PC
883          with KSPGetPC())
884 
885 
886    Level: beginner
887 
888    Concepts: additive Schwarz method
889 
890     References:
891     An additive variant of the Schwarz alternating method for the case of many subregions
892     M Dryja, OB Widlund - Courant Institute, New York University Technical report
893 
894     Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
895     Barry Smith, Petter Bjorstad, and William Gropp, Cambridge University Press, ISBN 0-521-49589-X.
896 
897 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
898            PCBJACOBI, PCASMSetUseTrueLocal(), PCASMGetSubKSP(), PCASMSetLocalSubdomains(),
899            PCASMSetTotalSubdomains(), PCSetModifySubmatrices(), PCASMSetOverlap(), PCASMSetType(),
900            PCASMSetUseInPlace()
901 M*/
902 
903 EXTERN_C_BEGIN
904 #undef __FUNCT__
905 #define __FUNCT__ "PCCreate_ASM"
906 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_ASM(PC pc)
907 {
908   PetscErrorCode ierr;
909   PC_ASM         *osm;
910 
911   PetscFunctionBegin;
912   ierr = PetscNewLog(pc,PC_ASM,&osm);CHKERRQ(ierr);
913   osm->n                 = PETSC_DECIDE;
914   osm->n_local           = 0;
915   osm->n_local_true      = 0;
916   osm->overlap           = 1;
917   osm->ksp               = 0;
918   osm->scat              = 0;
919   osm->x                 = 0;
920   osm->y                 = 0;
921   osm->is                = 0;
922   osm->mat               = 0;
923   osm->pmat              = 0;
924   osm->type              = PC_ASM_RESTRICT;
925   osm->same_local_solves = PETSC_TRUE;
926   osm->inplace           = PETSC_FALSE;
927 
928   pc->data                   = (void*)osm;
929   pc->ops->apply             = PCApply_ASM;
930   pc->ops->applytranspose    = PCApplyTranspose_ASM;
931   pc->ops->setup             = PCSetUp_ASM;
932   pc->ops->destroy           = PCDestroy_ASM;
933   pc->ops->setfromoptions    = PCSetFromOptions_ASM;
934   pc->ops->setuponblocks     = PCSetUpOnBlocks_ASM;
935   pc->ops->view              = PCView_ASM;
936   pc->ops->applyrichardson   = 0;
937 
938   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetLocalSubdomains_C","PCASMSetLocalSubdomains_ASM",
939                     PCASMSetLocalSubdomains_ASM);CHKERRQ(ierr);
940   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetTotalSubdomains_C","PCASMSetTotalSubdomains_ASM",
941                     PCASMSetTotalSubdomains_ASM);CHKERRQ(ierr);
942   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetOverlap_C","PCASMSetOverlap_ASM",
943                     PCASMSetOverlap_ASM);CHKERRQ(ierr);
944   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetType_C","PCASMSetType_ASM",
945                     PCASMSetType_ASM);CHKERRQ(ierr);
946   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMGetSubKSP_C","PCASMGetSubKSP_ASM",
947                     PCASMGetSubKSP_ASM);CHKERRQ(ierr);
948   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASMSetUseInPlace_C","PCASMSetUseInPlace_ASM",
949                     PCASMSetUseInPlace_ASM);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 EXTERN_C_END
953 
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "PCASMCreateSubdomains"
957 /*@C
958    PCASMCreateSubdomains - Creates the index sets for the overlapping Schwarz
959    preconditioner for a any problem on a general grid.
960 
961    Collective
962 
963    Input Parameters:
964 +  A - The global matrix operator
965 -  n - the number of local blocks
966 
967    Output Parameters:
968 .  outis - the array of index sets defining the subdomains
969 
970    Level: advanced
971 
972    Note: this generates nonoverlapping subdomains; the PCASM will generate the overlap
973     from these if you use PCASMSetLocalSubdomains()
974 
975     In the Fortran version you must provide the array outis[] already allocated of length n.
976 
977 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
978 
979 .seealso: PCASMSetLocalSubdomains(), PCASMDestroySubdomains()
980 @*/
981 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains(Mat A, PetscInt n, IS* outis[])
982 {
983   MatPartitioning           mpart;
984   const char                *prefix;
985   PetscErrorCode            (*f)(Mat,PetscTruth*,MatReuse,Mat*);
986   PetscMPIInt               size;
987   PetscInt                  i,j,rstart,rend,bs;
988   PetscTruth                iscopy = PETSC_FALSE,isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
989   Mat                       Ad = PETSC_NULL, adj;
990   IS                        ispart,isnumb,*is;
991   PetscErrorCode            ierr;
992 
993   PetscFunctionBegin;
994   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
995   PetscValidPointer(outis,3);
996   if (n < 1) {SETERRQ1(PETSC_ERR_ARG_WRONG,"number of local blocks must be > 0, n = %D",n);}
997 
998   /* Get prefix, row distribution, and block size */
999   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1000   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1001   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1002   if (rstart/bs*bs != rstart || rend/bs*bs != rend) {
1003     SETERRQ3(PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1004   }
1005   /* Get diagonal block from matrix if possible */
1006   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1007   ierr = PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",(void (**)(void))&f);CHKERRQ(ierr);
1008   if (f) {
1009     ierr = (*f)(A,&iscopy,MAT_INITIAL_MATRIX,&Ad);CHKERRQ(ierr);
1010   } else if (size == 1) {
1011     iscopy = PETSC_FALSE; Ad = A;
1012   } else {
1013     iscopy = PETSC_FALSE; Ad = PETSC_NULL;
1014   }
1015   if (Ad) {
1016     ierr = PetscTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1017     if (!isbaij) {ierr = PetscTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1018   }
1019   if (Ad && n > 1) {
1020     PetscTruth match,done;
1021     /* Try to setup a good matrix partitioning if available */
1022     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1023     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1024     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1025     ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_CURRENT,&match);CHKERRQ(ierr);
1026     if (!match) {
1027       ierr = PetscTypeCompare((PetscObject)mpart,MAT_PARTITIONING_SQUARE,&match);CHKERRQ(ierr);
1028     }
1029     if (!match) { /* assume a "good" partitioner is available */
1030       PetscInt na,*ia,*ja;
1031       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1032       if (done) {
1033 	/* Build adjacency matrix by hand. Unfortunately a call to
1034 	   MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1035 	   remove the block-aij structure and we cannot expect
1036 	   MatPartitioning to split vertices as we need */
1037 	PetscInt i,j,*row,len,nnz,cnt,*iia=0,*jja=0;
1038 	nnz = 0;
1039 	for (i=0; i<na; i++) { /* count number of nonzeros */
1040 	  len = ia[i+1] - ia[i];
1041 	  row = ja + ia[i];
1042 	  for (j=0; j<len; j++) {
1043 	    if (row[j] == i) { /* don't count diagonal */
1044 	      len--; break;
1045 	    }
1046 	  }
1047 	  nnz += len;
1048 	}
1049 	ierr = PetscMalloc((na+1)*sizeof(PetscInt),&iia);CHKERRQ(ierr);
1050 	ierr = PetscMalloc((nnz)*sizeof(PetscInt),&jja);CHKERRQ(ierr);
1051 	nnz    = 0;
1052 	iia[0] = 0;
1053 	for (i=0; i<na; i++) { /* fill adjacency */
1054 	  cnt = 0;
1055 	  len = ia[i+1] - ia[i];
1056 	  row = ja + ia[i];
1057 	  for (j=0; j<len; j++) {
1058 	    if (row[j] != i) { /* if not diagonal */
1059 	      jja[nnz+cnt++] = row[j];
1060 	    }
1061 	  }
1062 	  nnz += cnt;
1063 	  iia[i+1] = nnz;
1064 	}
1065 	/* Partitioning of the adjacency matrix */
1066 	ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,PETSC_NULL,&adj);CHKERRQ(ierr);
1067 	ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1068 	ierr = MatPartitioningSetNParts(mpart,n);CHKERRQ(ierr);
1069 	ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1070 	ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1071 	ierr = MatDestroy(adj);CHKERRQ(ierr);
1072 	foundpart = PETSC_TRUE;
1073       }
1074       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1075     }
1076     ierr = MatPartitioningDestroy(mpart);CHKERRQ(ierr);
1077   }
1078   if (iscopy) {ierr = MatDestroy(Ad);CHKERRQ(ierr);}
1079 
1080   ierr = PetscMalloc(n*sizeof(IS),&is);CHKERRQ(ierr);
1081   *outis = is;
1082 
1083   if (!foundpart) {
1084 
1085     /* Partitioning by contiguous chunks of rows */
1086 
1087     PetscInt mbs   = (rend-rstart)/bs;
1088     PetscInt start = rstart;
1089     for (i=0; i<n; i++) {
1090       PetscInt count = (mbs/n + ((mbs % n) > i)) * bs;
1091       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1092       start += count;
1093     }
1094 
1095   } else {
1096 
1097     /* Partitioning by adjacency of diagonal block  */
1098 
1099     const PetscInt *numbering;
1100     PetscInt       *count,nidx,*indices,*newidx,start=0;
1101     /* Get node count in each partition */
1102     ierr = PetscMalloc(n*sizeof(PetscInt),&count);CHKERRQ(ierr);
1103     ierr = ISPartitioningCount(ispart,n,count);CHKERRQ(ierr);
1104     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1105       for (i=0; i<n; i++) count[i] *= bs;
1106     }
1107     /* Build indices from node numbering */
1108     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1109     ierr = PetscMalloc(nidx*sizeof(PetscInt),&indices);CHKERRQ(ierr);
1110     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1111     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1112     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1113     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1114     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1115       ierr = PetscMalloc(nidx*bs*sizeof(PetscInt),&newidx);CHKERRQ(ierr);
1116       for (i=0; i<nidx; i++)
1117 	for (j=0; j<bs; j++)
1118 	  newidx[i*bs+j] = indices[i]*bs + j;
1119       ierr = PetscFree(indices);CHKERRQ(ierr);
1120       nidx   *= bs;
1121       indices = newidx;
1122     }
1123     /* Shift to get global indices */
1124     for (i=0; i<nidx; i++) indices[i] += rstart;
1125 
1126     /* Build the index sets for each block */
1127     for (i=0; i<n; i++) {
1128       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],&is[i]);CHKERRQ(ierr);
1129       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1130       start += count[i];
1131     }
1132 
1133     ierr = PetscFree(count);
1134     ierr = PetscFree(indices);
1135     ierr = ISDestroy(isnumb);CHKERRQ(ierr);
1136     ierr = ISDestroy(ispart);CHKERRQ(ierr);
1137 
1138   }
1139 
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "PCASMDestroySubdomains"
1145 /*@C
1146    PCASMDestroySubdomains - Destroys the index sets created with
1147    PCASMCreateSubdomains(). Should be called after setting subdomains
1148    with PCASMSetLocalSubdomains().
1149 
1150    Collective
1151 
1152    Input Parameters:
1153 +  n - the number of index sets
1154 -  is - the array of index sets
1155 
1156    Level: advanced
1157 
1158 .keywords: PC, ASM, additive Schwarz, create, subdomains, unstructured grid
1159 
1160 .seealso: PCASMCreateSubdomains(), PCASMSetLocalSubdomains()
1161 @*/
1162 PetscErrorCode PETSCKSP_DLLEXPORT PCASMDestroySubdomains(PetscInt n, IS is[])
1163 {
1164   PetscInt       i;
1165   PetscErrorCode ierr;
1166   PetscFunctionBegin;
1167   if (n <= 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"n must be > 0: n = %D",n);
1168   PetscValidPointer(is,2);
1169   for (i=0; i<n; i++) { ierr = ISDestroy(is[i]);CHKERRQ(ierr); }
1170   ierr = PetscFree(is);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "PCASMCreateSubdomains2D"
1176 /*@
1177    PCASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1178    preconditioner for a two-dimensional problem on a regular grid.
1179 
1180    Not Collective
1181 
1182    Input Parameters:
1183 +  m, n - the number of mesh points in the x and y directions
1184 .  M, N - the number of subdomains in the x and y directions
1185 .  dof - degrees of freedom per node
1186 -  overlap - overlap in mesh lines
1187 
1188    Output Parameters:
1189 +  Nsub - the number of subdomains created
1190 -  is - the array of index sets defining the subdomains
1191 
1192    Note:
1193    Presently PCAMSCreateSubdomains2d() is valid only for sequential
1194    preconditioners.  More general related routines are
1195    PCASMSetTotalSubdomains() and PCASMSetLocalSubdomains().
1196 
1197    Level: advanced
1198 
1199 .keywords: PC, ASM, additive Schwarz, create, subdomains, 2D, regular grid
1200 
1201 .seealso: PCASMSetTotalSubdomains(), PCASMSetLocalSubdomains(), PCASMGetSubKSP(),
1202           PCASMSetOverlap()
1203 @*/
1204 PetscErrorCode PETSCKSP_DLLEXPORT PCASMCreateSubdomains2D(PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt dof,PetscInt overlap,PetscInt *Nsub,IS **is)
1205 {
1206   PetscInt       i,j,height,width,ystart,xstart,yleft,yright,xleft,xright,loc_outter;
1207   PetscErrorCode ierr;
1208   PetscInt       nidx,*idx,loc,ii,jj,count;
1209 
1210   PetscFunctionBegin;
1211   if (dof != 1) SETERRQ(PETSC_ERR_SUP," ");
1212 
1213   *Nsub = N*M;
1214   ierr = PetscMalloc((*Nsub)*sizeof(IS *),is);CHKERRQ(ierr);
1215   ystart = 0;
1216   loc_outter = 0;
1217   for (i=0; i<N; i++) {
1218     height = n/N + ((n % N) > i); /* height of subdomain */
1219     if (height < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many N subdomains for mesh dimension n");
1220     yleft  = ystart - overlap; if (yleft < 0) yleft = 0;
1221     yright = ystart + height + overlap; if (yright > n) yright = n;
1222     xstart = 0;
1223     for (j=0; j<M; j++) {
1224       width = m/M + ((m % M) > j); /* width of subdomain */
1225       if (width < 2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Too many M subdomains for mesh dimension m");
1226       xleft  = xstart - overlap; if (xleft < 0) xleft = 0;
1227       xright = xstart + width + overlap; if (xright > m) xright = m;
1228       nidx   = (xright - xleft)*(yright - yleft);
1229       ierr = PetscMalloc(nidx*sizeof(PetscInt),&idx);CHKERRQ(ierr);
1230       loc    = 0;
1231       for (ii=yleft; ii<yright; ii++) {
1232         count = m*ii + xleft;
1233         for (jj=xleft; jj<xright; jj++) {
1234           idx[loc++] = count++;
1235         }
1236       }
1237       ierr = ISCreateGeneral(PETSC_COMM_SELF,nidx,idx,(*is)+loc_outter++);CHKERRQ(ierr);
1238       ierr = PetscFree(idx);CHKERRQ(ierr);
1239       xstart += width;
1240     }
1241     ystart += height;
1242   }
1243   for (i=0; i<*Nsub; i++) { ierr = ISSort((*is)[i]);CHKERRQ(ierr); }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "PCASMGetLocalSubdomains"
1249 /*@C
1250     PCASMGetLocalSubdomains - Gets the local subdomains (for this processor
1251     only) for the additive Schwarz preconditioner.
1252 
1253     Collective on PC
1254 
1255     Input Parameter:
1256 .   pc - the preconditioner context
1257 
1258     Output Parameters:
1259 +   n - the number of subdomains for this processor (default value = 1)
1260 -   is - the index sets that define the subdomains for this processor
1261 
1262 
1263     Notes:
1264     The IS numbering is in the parallel, global numbering of the vector.
1265 
1266     Level: advanced
1267 
1268 .keywords: PC, ASM, set, local, subdomains, additive Schwarz
1269 
1270 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1271           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubmatrices()
1272 @*/
1273 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubdomains(PC pc,PetscInt *n,IS *is[])
1274 {
1275   PC_ASM         *osm;
1276   PetscErrorCode ierr;
1277   PetscTruth     match;
1278 
1279   PetscFunctionBegin;
1280   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1281   PetscValidIntPointer(n,2);
1282   if (is) PetscValidPointer(is,3);
1283   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1284   if (!match) {
1285     if (n)  *n  = 0;
1286     if (is) *is = PETSC_NULL;
1287   } else {
1288     osm = (PC_ASM*)pc->data;
1289     if (n)  *n  = osm->n_local_true;
1290     if (is) *is = osm->is;
1291   }
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "PCASMGetLocalSubmatrices"
1297 /*@C
1298     PCASMGetLocalSubmatrices - Gets the local submatrices (for this processor
1299     only) for the additive Schwarz preconditioner.
1300 
1301     Collective on PC
1302 
1303     Input Parameter:
1304 .   pc - the preconditioner context
1305 
1306     Output Parameters:
1307 +   n - the number of matrices for this processor (default value = 1)
1308 -   mat - the matrices
1309 
1310 
1311     Level: advanced
1312 
1313 .keywords: PC, ASM, set, local, subdomains, additive Schwarz, block Jacobi
1314 
1315 .seealso: PCASMSetTotalSubdomains(), PCASMSetOverlap(), PCASMGetSubKSP(),
1316           PCASMCreateSubdomains2D(), PCASMSetLocalSubdomains(), PCASMGetLocalSubdomains()
1317 @*/
1318 PetscErrorCode PETSCKSP_DLLEXPORT PCASMGetLocalSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1319 {
1320   PC_ASM         *osm;
1321   PetscErrorCode ierr;
1322   PetscTruth     match;
1323 
1324   PetscFunctionBegin;
1325   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
1326   PetscValidIntPointer(n,2);
1327   if (mat) PetscValidPointer(mat,3);
1328   if (!pc->setupcalled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1329   ierr = PetscTypeCompare((PetscObject)pc,PCASM,&match);CHKERRQ(ierr);
1330   if (!match) {
1331     if (n)   *n   = 0;
1332     if (mat) *mat = PETSC_NULL;
1333   } else {
1334     osm = (PC_ASM*)pc->data;
1335     if (n)   *n   = osm->n_local_true;
1336     if (mat) *mat = osm->pmat;
1337   }
1338   PetscFunctionReturn(0);
1339 }
1340