xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision 5f042095cd6794de959e41580753d6ff4dec299c)
1 /*
2  GAMG geometric-algebric multiogrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h>        /*I "petscpc.h" I*/
6 #include <petsc-private/kspimpl.h>
7 
8 #include <assert.h>
9 #include <petscblaslapack.h>
10 
11 typedef struct {
12   PetscInt nsmooths;
13   PetscBool sym_graph;
14   PetscBool square_graph;
15 }PC_GAMG_AGG;
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "PCGAMGSetNSmooths"
19 /*@
20    PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical)
21 
22    Not Collective on PC
23 
24    Input Parameters:
25 .  pc - the preconditioner context
26 
27    Options Database Key:
28 .  -pc_gamg_agg_nsmooths
29 
30    Level: intermediate
31 
32    Concepts: Aggregation AMG preconditioner
33 
34 .seealso: ()
35 @*/
36 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
37 {
38   PetscErrorCode ierr;
39 
40   PetscFunctionBegin;
41   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
42   ierr = PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 
46 EXTERN_C_BEGIN
47 #undef __FUNCT__
48 #define __FUNCT__ "PCGAMGSetNSmooths_GAMG"
49 PetscErrorCode PCGAMGSetNSmooths_GAMG(PC pc, PetscInt n)
50 {
51   PC_MG           *mg = (PC_MG*)pc->data;
52   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
53   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
54 
55   PetscFunctionBegin;
56   pc_gamg_agg->nsmooths = n;
57   PetscFunctionReturn(0);
58 }
59 EXTERN_C_END
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCGAMGSetSymGraph"
63 /*@
64    PCGAMGSetSymGraph -
65 
66    Not Collective on PC
67 
68    Input Parameters:
69 .  pc - the preconditioner context
70 
71    Options Database Key:
72 .  -pc_gamg_sym_graph
73 
74    Level: intermediate
75 
76    Concepts: Aggregation AMG preconditioner
77 
78 .seealso: ()
79 @*/
80 PetscErrorCode PCGAMGSetSymGraph(PC pc, PetscBool n)
81 {
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
86   ierr = PetscTryMethod(pc,"PCGAMGSetSymGraph_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 EXTERN_C_BEGIN
91 #undef __FUNCT__
92 #define __FUNCT__ "PCGAMGSetSymGraph_GAMG"
93 PetscErrorCode PCGAMGSetSymGraph_GAMG(PC pc, PetscBool n)
94 {
95   PC_MG           *mg = (PC_MG*)pc->data;
96   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
97   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
98 
99   PetscFunctionBegin;
100   pc_gamg_agg->sym_graph = n;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "PCGAMGSetSquareGraph"
107 /*@
108    PCGAMGSetSquareGraph -
109 
110    Not Collective on PC
111 
112    Input Parameters:
113 .  pc - the preconditioner context
114 
115    Options Database Key:
116 .  -pc_gamg_square_graph
117 
118    Level: intermediate
119 
120    Concepts: Aggregation AMG preconditioner
121 
122 .seealso: ()
123 @*/
124 PetscErrorCode PCGAMGSetSquareGraph(PC pc, PetscBool n)
125 {
126   PetscErrorCode ierr;
127 
128   PetscFunctionBegin;
129   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
130   ierr = PetscTryMethod(pc,"PCGAMGSetSquareGraph_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 EXTERN_C_BEGIN
135 #undef __FUNCT__
136 #define __FUNCT__ "PCGAMGSetSquareGraph_GAMG"
137 PetscErrorCode PCGAMGSetSquareGraph_GAMG(PC pc, PetscBool n)
138 {
139   PC_MG           *mg = (PC_MG*)pc->data;
140   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
141   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
142 
143   PetscFunctionBegin;
144   pc_gamg_agg->square_graph = n;
145   PetscFunctionReturn(0);
146 }
147 EXTERN_C_END
148 
149 /* -------------------------------------------------------------------------- */
150 /*
151    PCSetFromOptions_GAMG_AGG
152 
153   Input Parameter:
154    . pc -
155 */
156 #undef __FUNCT__
157 #define __FUNCT__ "PCSetFromOptions_GAMG_AGG"
158 PetscErrorCode PCSetFromOptions_GAMG_AGG( PC pc )
159 {
160   PetscErrorCode  ierr;
161   PC_MG           *mg = (PC_MG*)pc->data;
162   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
163   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
164   PetscBool        flag;
165 
166   PetscFunctionBegin;
167   /* call base class */
168   ierr = PCSetFromOptions_GAMG( pc ); CHKERRQ(ierr);
169 
170   ierr = PetscOptionsHead("GAMG-AGG options"); CHKERRQ(ierr);
171   {
172     /* -pc_gamg_agg_nsmooths */
173     pc_gamg_agg->nsmooths = 0;
174     ierr = PetscOptionsInt("-pc_gamg_agg_nsmooths",
175                            "smoothing steps for smoothed aggregation, usually 1 (0)",
176                            "PCGAMGSetNSmooths",
177                            pc_gamg_agg->nsmooths,
178                            &pc_gamg_agg->nsmooths,
179                            &flag);
180     CHKERRQ(ierr);
181 
182     /* -pc_gamg_sym_graph */
183     pc_gamg_agg->sym_graph = PETSC_FALSE;
184     ierr = PetscOptionsBool("-pc_gamg_sym_graph",
185                             "Set for asymmetric matrices",
186                             "PCGAMGSetSymGraph",
187                             pc_gamg_agg->sym_graph,
188                             &pc_gamg_agg->sym_graph,
189                             &flag);
190     CHKERRQ(ierr);
191 
192     /* -pc_gamg_square_graph */
193     pc_gamg_agg->square_graph = PETSC_TRUE;
194     ierr = PetscOptionsBool("-pc_gamg_square_graph",
195                             "For faster coarsening and lower coarse grid complexity",
196                             "PCGAMGSetSquareGraph",
197                             pc_gamg_agg->square_graph,
198                             &pc_gamg_agg->square_graph,
199                             &flag);
200     CHKERRQ(ierr);
201   }
202   ierr = PetscOptionsTail();CHKERRQ(ierr);
203 
204   PetscFunctionReturn(0);
205 }
206 
207 /* -------------------------------------------------------------------------- */
208 /*
209    PCDestroy_AGG
210 
211   Input Parameter:
212    . pc -
213 */
214 #undef __FUNCT__
215 #define __FUNCT__ "PCDestroy_AGG"
216 PetscErrorCode PCDestroy_AGG( PC pc )
217 {
218   PetscErrorCode  ierr;
219   PC_MG           *mg = (PC_MG*)pc->data;
220   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
221   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
222 
223   PetscFunctionBegin;
224   if( pc_gamg_agg ) {
225     ierr = PetscFree(pc_gamg_agg);CHKERRQ(ierr);
226     pc_gamg_agg = 0;
227   }
228 
229   /* call base class */
230   ierr = PCDestroy_GAMG( pc );CHKERRQ(ierr);
231 
232   PetscFunctionReturn(0);
233 }
234 
235 /* -------------------------------------------------------------------------- */
236 /*
237    PCSetCoordinates_AGG
238 
239    Input Parameter:
240    .  pc - the preconditioner context
241 */
242 EXTERN_C_BEGIN
243 #undef __FUNCT__
244 #define __FUNCT__ "PCSetCoordinates_AGG"
245 PetscErrorCode PCSetCoordinates_AGG( PC pc, PetscInt ndm, PetscReal *coords )
246 {
247   PC_MG          *mg = (PC_MG*)pc->data;
248   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
249   PetscErrorCode ierr;
250   PetscInt       arrsz,bs,my0,kk,ii,jj,nloc,Iend;
251   Mat            Amat = pc->pmat;
252 
253   PetscFunctionBegin;
254   PetscValidHeaderSpecific( Amat, MAT_CLASSID, 1 );
255   ierr  = MatGetBlockSize( Amat, &bs );               CHKERRQ( ierr );
256   ierr  = MatGetOwnershipRange( Amat, &my0, &Iend ); CHKERRQ(ierr);
257   nloc = (Iend-my0)/bs;
258   if((Iend-my0)%bs!=0) SETERRQ1(((PetscObject)Amat)->comm,PETSC_ERR_ARG_WRONG, "Bad local size %d.",nloc);
259 
260   /* SA: null space vectors */
261   if( coords && bs==1 ) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
262   else if( coords ) pc_gamg->data_cell_cols = (ndm==2 ? 3 : 6); /* elasticity */
263   else pc_gamg->data_cell_cols = bs; /* no data, force SA with constant null space vectors */
264   pc_gamg->data_cell_rows = bs;
265 
266   arrsz = nloc*pc_gamg->data_cell_rows*pc_gamg->data_cell_cols;
267 
268   /* create data - syntactic sugar that should be refactored at some point */
269   if (pc_gamg->data==0 || (pc_gamg->data_sz != arrsz)) {
270     ierr = PetscFree( pc_gamg->data );  CHKERRQ(ierr);
271     ierr = PetscMalloc(arrsz*sizeof(PetscReal), &pc_gamg->data ); CHKERRQ(ierr);
272   }
273   /* copy data in - column oriented */
274   for(kk=0;kk<nloc;kk++){
275     const PetscInt M = Iend - my0;
276     PetscReal *data = &pc_gamg->data[kk*bs];
277     if( pc_gamg->data_cell_cols==1 ) *data = 1.0;
278     else {
279       for(ii=0;ii<bs;ii++)
280         for(jj=0;jj<bs;jj++)
281           if(ii==jj)data[ii*M + jj] = 1.0; /* translational modes */
282           else data[ii*M + jj] = 0.0;
283       if( coords ) {
284         if( ndm == 2 ){ /* rotational modes */
285           data += 2*M;
286           data[0] = -coords[2*kk+1];
287           data[1] =  coords[2*kk];
288         }
289         else {
290           data += 3*M;
291           data[0] = 0.0;               data[M+0] =  coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1];
292           data[1] = -coords[3*kk+2]; data[M+1] = 0.0;               data[2*M+1] =  coords[3*kk];
293           data[2] =  coords[3*kk+1]; data[M+2] = -coords[3*kk];   data[2*M+2] = 0.0;
294         }
295       }
296     }
297   }
298 
299   pc_gamg->data_sz = arrsz;
300 
301   PetscFunctionReturn(0);
302 }
303 EXTERN_C_END
304 
305 typedef PetscInt NState;
306 static const NState NOT_DONE=-2;
307 static const NState DELETED=-1;
308 static const NState REMOVED=-3;
309 #define IS_SELECTED(s) (s!=DELETED && s!=NOT_DONE && s!=REMOVED)
310 
311 /* -------------------------------------------------------------------------- */
312 /*
313    smoothAggs - greedy grab of with G1 (unsquared graph) -- AIJ specific
314      - AGG-MG specific: clears singletons out of 'selected_2'
315 
316    Input Parameter:
317    . Gmat_2 - glabal matrix of graph (data not defined)
318    . Gmat_1 - base graph to grab with
319    Input/Output Parameter:
320    . aggs_2 - linked list of aggs with gids )
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "smoothAggs"
324 static PetscErrorCode smoothAggs( const Mat Gmat_2, /* base (squared) graph */
325                                   const Mat Gmat_1, /* base graph */
326                                   /* const IS selected_2, [nselected local] selected vertices */
327                                   PetscCoarsenData *aggs_2 /* [nselected local] global ID of aggregate */
328                                   )
329 {
330   PetscErrorCode ierr;
331   PetscBool      isMPI;
332   Mat_SeqAIJ    *matA_1, *matB_1=0, *matA_2, *matB_2=0;
333   MPI_Comm       wcomm = ((PetscObject)Gmat_2)->comm;
334   PetscMPIInt    mype,npe;
335   PetscInt       lid,*ii,*idx,ix,Iend,my0,kk,n,j;
336   Mat_MPIAIJ    *mpimat_2 = 0, *mpimat_1=0;
337   const PetscInt nloc = Gmat_2->rmap->n;
338   PetscScalar   *cpcol_1_state,*cpcol_2_state,*cpcol_2_par_orig,*lid_parent_gid;
339   PetscInt      *lid_cprowID_1;
340   NState        *lid_state;
341   Vec            ghost_par_orig2;
342 
343   PetscFunctionBegin;
344   ierr = MPI_Comm_rank( wcomm, &mype );   CHKERRQ(ierr);
345   ierr = MPI_Comm_size( wcomm, &npe );   CHKERRQ(ierr);
346   ierr = MatGetOwnershipRange(Gmat_1,&my0,&Iend);  CHKERRQ(ierr);
347 
348   if( PETSC_FALSE ) {
349     PetscViewer viewer; char fname[32]; static int llev=0;
350     sprintf(fname,"Gmat2_%d.m",llev++);
351     PetscViewerASCIIOpen(wcomm,fname,&viewer);
352     ierr = PetscViewerSetFormat( viewer, PETSC_VIEWER_ASCII_MATLAB);  CHKERRQ(ierr);
353     ierr = MatView(Gmat_2, viewer ); CHKERRQ(ierr);
354     ierr = PetscViewerDestroy( &viewer );
355   }
356 
357   /* get submatrices */
358   ierr = PetscTypeCompare( (PetscObject)Gmat_1, MATMPIAIJ, &isMPI ); CHKERRQ(ierr);
359   if(isMPI) {
360     /* grab matrix objects */
361     mpimat_2 = (Mat_MPIAIJ*)Gmat_2->data;
362     mpimat_1 = (Mat_MPIAIJ*)Gmat_1->data;
363     matA_1 = (Mat_SeqAIJ*)mpimat_1->A->data;
364     matB_1 = (Mat_SeqAIJ*)mpimat_1->B->data;
365     matA_2 = (Mat_SeqAIJ*)mpimat_2->A->data;
366     matB_2 = (Mat_SeqAIJ*)mpimat_2->B->data;
367 
368     /* force compressed row storage for B matrix in AuxMat */
369     matB_1->compressedrow.check = PETSC_TRUE;
370     ierr = MatCheckCompressedRow(mpimat_1->B,&matB_1->compressedrow,matB_1->i,Gmat_1->rmap->n,-1.0);
371     CHKERRQ(ierr);
372 
373     ierr = PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID_1 ); CHKERRQ(ierr);
374     for( lid = 0 ; lid < nloc ; lid++ ) lid_cprowID_1[lid] = -1;
375     for (ix=0; ix<matB_1->compressedrow.nrows; ix++) {
376       PetscInt lid = matB_1->compressedrow.rindex[ix];
377       lid_cprowID_1[lid] = ix;
378     }
379   }
380   else {
381     matA_1 = (Mat_SeqAIJ*)Gmat_1->data;
382     matA_2 = (Mat_SeqAIJ*)Gmat_2->data;
383     lid_cprowID_1 = PETSC_NULL;
384   }
385   assert( matA_1 && !matA_1->compressedrow.use );
386   assert( matB_1==0 || matB_1->compressedrow.use );
387   assert( matA_2 && !matA_2->compressedrow.use );
388   assert( matB_2==0 || matB_2->compressedrow.use );
389 
390   /* get state of locals and selected gid for deleted */
391   ierr = PetscMalloc( nloc*sizeof(NState), &lid_state ); CHKERRQ(ierr);
392   ierr = PetscMalloc( nloc*sizeof(PetscScalar), &lid_parent_gid ); CHKERRQ(ierr);
393   for( lid = 0 ; lid < nloc ; lid++ ) {
394     lid_parent_gid[lid] = -1.0;
395     lid_state[lid] = DELETED;
396   }
397 
398   /* set lid_state */
399   for( lid = 0 ; lid < nloc ; lid++ ) {
400     PetscCDPos pos;
401     if( (pos=PetscCDGetHeadPos(aggs_2,lid)) ) {
402       PetscInt gid1 = LLNGetID(pos); assert(gid1==lid+my0);
403       lid_state[lid] = gid1;
404     }
405   }
406 
407   /* map local to selected local, DELETED means a ghost owns it */
408   for(lid=kk=0;lid<nloc;lid++){
409     NState state = lid_state[lid];
410     if( IS_SELECTED(state) ){
411       PetscCDPos pos;
412       for( pos=PetscCDGetHeadPos(aggs_2,lid) ; pos ; pos=PetscCDGetNextPos(aggs_2,lid,pos)){
413         PetscInt gid1 = LLNGetID(pos);
414         if( gid1 >= my0 && gid1 < Iend ){
415           lid_parent_gid[gid1-my0] = (PetscScalar)(lid + my0);
416         }
417       }
418     }
419   }
420   /* get 'cpcol_1/2_state' & cpcol_2_par_orig - uses mpimat_1/2->lvec for temp space */
421   if (isMPI) {
422     Vec          tempVec;
423     /* get 'cpcol_1_state' */
424     ierr = MatGetVecs( Gmat_1, &tempVec, 0 );         CHKERRQ(ierr);
425     for(kk=0,j=my0;kk<nloc;kk++,j++){
426       PetscScalar v = (PetscScalar)lid_state[kk];
427       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
428     }
429     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
430     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
431     ierr = VecScatterBegin(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
432     CHKERRQ(ierr);
433     ierr =   VecScatterEnd(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
434     CHKERRQ(ierr);
435     ierr = VecGetArray( mpimat_1->lvec, &cpcol_1_state ); CHKERRQ(ierr);
436     /* get 'cpcol_2_state' */
437     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
438     CHKERRQ(ierr);
439     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
440     CHKERRQ(ierr);
441     ierr = VecGetArray( mpimat_2->lvec, &cpcol_2_state ); CHKERRQ(ierr);
442     /* get 'cpcol_2_par_orig' */
443     for(kk=0,j=my0;kk<nloc;kk++,j++){
444       PetscScalar v = (PetscScalar)lid_parent_gid[kk];
445       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
446     }
447     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
448     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
449     ierr = VecDuplicate( mpimat_2->lvec, &ghost_par_orig2 ); CHKERRQ(ierr);
450     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, ghost_par_orig2,INSERT_VALUES,SCATTER_FORWARD);
451     CHKERRQ(ierr);
452     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, ghost_par_orig2,INSERT_VALUES,SCATTER_FORWARD);
453     CHKERRQ(ierr);
454     ierr = VecGetArray( ghost_par_orig2, &cpcol_2_par_orig ); CHKERRQ(ierr);
455 
456     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
457   } /* ismpi */
458 
459   /* doit */
460   for(lid=0;lid<nloc;lid++){
461     NState state = lid_state[lid];
462     if( IS_SELECTED(state) ) {
463       /* steal locals */
464       ii = matA_1->i; n = ii[lid+1] - ii[lid];
465       idx = matA_1->j + ii[lid];
466       for (j=0; j<n; j++) {
467         PetscInt lidj = idx[j], sgid;
468         NState statej = lid_state[lidj];
469         if (statej==DELETED && (sgid=(PetscInt)PetscRealPart(lid_parent_gid[lidj])) != lid+my0) { /* steal local */
470           lid_parent_gid[lidj] = (PetscScalar)(lid+my0); /* send this if sgid is not local */
471           if( sgid >= my0 && sgid < Iend ){       /* I'm stealing this local from a local sgid */
472             PetscInt hav=0,slid=sgid-my0,gidj=lidj+my0;
473             PetscCDPos pos,last=PETSC_NULL;
474             /* looking for local from local so id_llist_2 works */
475             for( pos=PetscCDGetHeadPos(aggs_2,slid) ; pos ; pos=PetscCDGetNextPos(aggs_2,slid,pos)){
476               PetscInt gid = LLNGetID(pos);
477               if( gid == gidj ) {
478                 assert(last);
479                 ierr = PetscCDRemoveNextNode( aggs_2, slid, last ); CHKERRQ(ierr);
480                 ierr = PetscCDAppendNode( aggs_2, lid, pos );       CHKERRQ(ierr);
481                 hav = 1;
482                 break;
483               }
484               else last = pos;
485             }
486             if(hav!=1){
487               if(hav==0)SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
488               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"found node %d times???",hav);
489             }
490           }
491           else{            /* I'm stealing this local, owned by a ghost */
492             assert(sgid==-1);
493             ierr = PetscCDAppendID( aggs_2, lid, lidj+my0 );      CHKERRQ(ierr);
494           }
495         }
496       } /* local neighbors */
497     }
498     else if( state == DELETED && lid_cprowID_1 ) {
499       PetscInt sgidold = (PetscInt)PetscRealPart(lid_parent_gid[lid]);
500       /* see if I have a selected ghost neighbor that will steal me */
501       if( (ix=lid_cprowID_1[lid]) != -1 ){
502         ii = matB_1->compressedrow.i; n = ii[ix+1] - ii[ix];
503         idx = matB_1->j + ii[ix];
504         for( j=0 ; j<n ; j++ ) {
505           PetscInt cpid = idx[j];
506           NState statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
507           if( IS_SELECTED(statej) && sgidold != (PetscInt)statej ) { /* ghost will steal this, remove from my list */
508             lid_parent_gid[lid] = (PetscScalar)statej; /* send who selected */
509             if( sgidold>=my0 && sgidold<Iend ) { /* this was mine */
510               PetscInt hav=0,oldslidj=sgidold-my0;
511               PetscCDPos pos,last=PETSC_NULL;
512               /* remove from 'oldslidj' list */
513               for( pos=PetscCDGetHeadPos(aggs_2,oldslidj) ; pos ; pos=PetscCDGetNextPos(aggs_2,oldslidj,pos)){
514                 PetscInt gid = LLNGetID(pos);
515                 if( lid+my0 == gid ) {
516                   /* id_llist_2[lastid] = id_llist_2[flid];   /\* remove lid from oldslidj list *\/ */
517                   assert(last);
518                   ierr = PetscCDRemoveNextNode( aggs_2, oldslidj, last ); CHKERRQ(ierr);
519                   /* ghost (PetscScalar)statej will add this later */
520                   hav = 1;
521                   break;
522                 }
523                 else last = pos;
524               }
525               if(hav!=1){
526                 if(hav==0)SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
527                 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"found node %d times???",hav);
528               }
529             }
530             else {
531               /* ghosts remove this later */
532             }
533           }
534         }
535       }
536     } /* selected/deleted */
537   } /* node loop */
538 
539   if( isMPI ) {
540     PetscScalar *cpcol_2_parent,*cpcol_2_gid;
541     Vec          tempVec,ghostgids2,ghostparents2;
542     PetscInt     cpid,nghost_2;
543     GAMGHashTable gid_cpid;
544 
545     ierr = VecGetSize( mpimat_2->lvec, &nghost_2 );   CHKERRQ(ierr);
546     ierr = MatGetVecs( Gmat_2, &tempVec, 0 );         CHKERRQ(ierr);
547 
548     /* get 'cpcol_2_parent' */
549     for(kk=0,j=my0;kk<nloc;kk++,j++){
550       ierr = VecSetValues( tempVec, 1, &j, &lid_parent_gid[kk], INSERT_VALUES );  CHKERRQ(ierr);
551     }
552     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
553     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
554     ierr = VecDuplicate( mpimat_2->lvec, &ghostparents2 ); CHKERRQ(ierr);
555     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, ghostparents2,INSERT_VALUES,SCATTER_FORWARD);
556     CHKERRQ(ierr);
557     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, ghostparents2,INSERT_VALUES,SCATTER_FORWARD);
558     CHKERRQ(ierr);
559     ierr = VecGetArray( ghostparents2, &cpcol_2_parent ); CHKERRQ(ierr);
560 
561     /* get 'cpcol_2_gid' */
562     for(kk=0,j=my0;kk<nloc;kk++,j++){
563       PetscScalar v = (PetscScalar)j;
564       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
565     }
566     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
567     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
568     ierr = VecDuplicate( mpimat_2->lvec, &ghostgids2 ); CHKERRQ(ierr);
569     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, ghostgids2,INSERT_VALUES,SCATTER_FORWARD);
570     CHKERRQ(ierr);
571     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, ghostgids2,INSERT_VALUES,SCATTER_FORWARD);
572     CHKERRQ(ierr);
573     ierr = VecGetArray( ghostgids2, &cpcol_2_gid ); CHKERRQ(ierr);
574 
575     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
576 
577     /* look for deleted ghosts and add to table */
578     ierr = GAMGTableCreate( 2*nghost_2, &gid_cpid ); CHKERRQ(ierr);
579     for( cpid = 0 ; cpid < nghost_2 ; cpid++ ) {
580       NState state = (NState)PetscRealPart(cpcol_2_state[cpid]);
581       if( state==DELETED ) {
582         PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
583         PetscInt sgid_old = (PetscInt)PetscRealPart(cpcol_2_par_orig[cpid]);
584         if( sgid_old == -1 && sgid_new != -1 ) {
585           PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
586           ierr = GAMGTableAdd( &gid_cpid, gid, cpid ); CHKERRQ(ierr);
587         }
588       }
589     }
590 
591     /* look for deleted ghosts and see if they moved - remove it */
592     for(lid=0;lid<nloc;lid++){
593       NState state = lid_state[lid];
594       if( IS_SELECTED(state) ){
595         PetscCDPos pos,last=PETSC_NULL;
596         /* look for deleted ghosts and see if they moved */
597         for( pos=PetscCDGetHeadPos(aggs_2,lid) ; pos ; pos=PetscCDGetNextPos(aggs_2,lid,pos)){
598           PetscInt gid = LLNGetID(pos);
599           if( gid < my0 || gid >= Iend ) {
600             ierr = GAMGTableFind( &gid_cpid, gid, &cpid ); CHKERRQ(ierr);
601             if( cpid != -1 ) {
602               /* a moved ghost - */
603               /* id_llist_2[lastid] = id_llist_2[flid];    /\* remove 'flid' from list *\/ */
604               ierr = PetscCDRemoveNextNode( aggs_2, lid, last ); CHKERRQ(ierr);
605             }
606             else last = pos;
607           }
608           else last = pos;
609         } /* loop over list of deleted */
610       } /* selected */
611     }
612     ierr = GAMGTableDestroy( &gid_cpid ); CHKERRQ(ierr);
613 
614     /* look at ghosts, see if they changed - and it */
615     for( cpid = 0 ; cpid < nghost_2 ; cpid++ ){
616       PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
617       if( sgid_new >= my0 && sgid_new < Iend ) { /* this is mine */
618         PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
619         PetscInt slid_new=sgid_new-my0,hav=0;
620         PetscCDPos pos;
621         /* search for this gid to see if I have it */
622         for( pos=PetscCDGetHeadPos(aggs_2,slid_new) ; pos ; pos=PetscCDGetNextPos(aggs_2,slid_new,pos) ) {
623           PetscInt gidj = LLNGetID(pos);
624           if( gidj == gid ) { hav = 1; break; }
625         }
626         if( hav != 1 ){
627           /* id_llist_2[flidj] = id_llist_2[slid_new]; id_llist_2[slid_new] = flidj; /\* insert 'flidj' into head of llist *\/ */
628           ierr = PetscCDAppendID( aggs_2, slid_new, gid );      CHKERRQ(ierr);
629         }
630       }
631     }
632 
633     ierr = VecRestoreArray( mpimat_1->lvec, &cpcol_1_state ); CHKERRQ(ierr);
634     ierr = VecRestoreArray( mpimat_2->lvec, &cpcol_2_state ); CHKERRQ(ierr);
635     ierr = VecRestoreArray( ghostparents2, &cpcol_2_parent ); CHKERRQ(ierr);
636     ierr = VecRestoreArray( ghostgids2, &cpcol_2_gid ); CHKERRQ(ierr);
637     ierr = PetscFree( lid_cprowID_1 );  CHKERRQ(ierr);
638     ierr = VecDestroy( &ghostgids2 ); CHKERRQ(ierr);
639     ierr = VecDestroy( &ghostparents2 ); CHKERRQ(ierr);
640     ierr = VecDestroy( &ghost_par_orig2 ); CHKERRQ(ierr);
641   }
642 
643   ierr = PetscFree( lid_parent_gid );  CHKERRQ(ierr);
644   ierr = PetscFree( lid_state );  CHKERRQ(ierr);
645 
646   PetscFunctionReturn(0);
647 }
648 
649 /* -------------------------------------------------------------------------- */
650 /*
651    PCSetData_AGG
652 
653   Input Parameter:
654    . pc -
655 */
656 #undef __FUNCT__
657 #define __FUNCT__ "PCSetData_AGG"
658 PetscErrorCode PCSetData_AGG( PC pc )
659 {
660   PetscErrorCode  ierr;
661   PetscFunctionBegin;
662   ierr = PCSetCoordinates_AGG( pc, -1, PETSC_NULL ); CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 /* -------------------------------------------------------------------------- */
667 /*
668  formProl0
669 
670    Input Parameter:
671    . agg_llists - list of arrays with aggregates
672    . bs - block size
673    . nSAvec - column bs of new P
674    . my0crs - global index of start of locals
675    . data_stride - bs*(nloc nodes + ghost nodes)
676    . data_in[data_stride*nSAvec] - local data on fine grid
677    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
678   Output Parameter:
679    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
680    . a_Prol - prolongation operator
681 */
682 #undef __FUNCT__
683 #define __FUNCT__ "formProl0"
684 static PetscErrorCode formProl0(const PetscCoarsenData *agg_llists,/* list from selected vertices of aggregate unselected vertices */
685                                 const PetscInt bs,          /* (row) block size */
686                                 const PetscInt nSAvec,      /* column bs */
687                                 const PetscInt my0crs,      /* global index of start of locals */
688                                 const PetscInt data_stride, /* (nloc+nghost)*bs */
689                                 PetscReal      data_in[],   /* [data_stride][nSAvec] */
690                                 const PetscInt flid_fgid[], /* [data_stride/bs] */
691                                 PetscReal **a_data_out,
692                                 Mat a_Prol /* prolongation operator (output)*/
693                                 )
694 {
695   PetscErrorCode ierr;
696   PetscInt  Istart,my0,Iend,nloc,clid,flid,aggID,kk,jj,ii,mm,ndone,nSelected,minsz,nghosts,out_data_stride;
697   MPI_Comm       wcomm = ((PetscObject)a_Prol)->comm;
698   PetscMPIInt    mype, npe;
699   PetscReal      *out_data;
700   PetscCDPos         pos;
701   GAMGHashTable  fgid_flid;
702 
703 /* #define OUT_AGGS */
704 #ifdef OUT_AGGS
705   static PetscInt llev = 0; char fname[32]; FILE *file; PetscInt pM;
706 #endif
707 
708   PetscFunctionBegin;
709   ierr = MPI_Comm_rank(wcomm,&mype);CHKERRQ(ierr);
710   ierr = MPI_Comm_size(wcomm,&npe);CHKERRQ(ierr);
711   ierr = MatGetOwnershipRange( a_Prol, &Istart, &Iend );    CHKERRQ(ierr);
712   nloc = (Iend-Istart)/bs; my0 = Istart/bs; assert((Iend-Istart)%bs==0);
713   Iend /= bs;
714   nghosts = data_stride/bs - nloc;
715 
716   ierr = GAMGTableCreate( 2*nghosts, &fgid_flid ); CHKERRQ(ierr);
717   for(kk=0;kk<nghosts;kk++) {
718     ierr = GAMGTableAdd( &fgid_flid, flid_fgid[nloc+kk], nloc+kk ); CHKERRQ(ierr);
719   }
720 
721 #ifdef OUT_AGGS
722   sprintf(fname,"aggs_%d_%d.m",llev++,mype);
723   if(llev==1) {
724     file = fopen(fname,"w");
725   }
726   MatGetSize( a_Prol, &pM, &jj );
727 #endif
728 
729   /* count selected -- same as number of cols of P */
730   for(nSelected=mm=0;mm<nloc;mm++) {
731     if( PetscCDSizeAt(agg_llists, mm) > 0 ) nSelected++;
732   }
733   ierr = MatGetOwnershipRangeColumn( a_Prol, &ii, &jj ); CHKERRQ(ierr);
734   assert((ii/nSAvec)==my0crs); assert(nSelected==(jj-ii)/nSAvec);
735 
736   /* aloc space for coarse point data (output) */
737   out_data_stride = nSelected*nSAvec;
738   ierr = PetscMalloc( out_data_stride*nSAvec*sizeof(PetscReal), &out_data ); CHKERRQ(ierr);
739   for(ii=0;ii<out_data_stride*nSAvec;ii++) {
740     out_data[ii]=1.e300;
741   }
742   *a_data_out = out_data; /* output - stride nSelected*nSAvec */
743 
744   /* find points and set prolongation */
745   minsz = 100;
746   ndone = 0;
747   for( mm = clid = 0 ; mm < nloc ; mm++ ){
748     if( (jj=PetscCDSizeAt(agg_llists,mm)) > 0 ) {
749       const PetscInt lid = mm, cgid = my0crs + clid;
750       PetscInt cids[100]; /* max bs */
751       PetscBLASInt asz=jj,M=asz*bs,N=nSAvec,INFO;
752       PetscBLASInt   Mdata=M+((N-M>0)?N-M:0),LDA=Mdata,LWORK=N*bs;
753       PetscScalar    *qqc,*qqr,*TAU,*WORK;
754       PetscInt       *fids;
755 
756       /* count agg */
757       if( asz<minsz ) minsz = asz;
758 
759       /* get block */
760       ierr = PetscMalloc( (Mdata*N)*sizeof(PetscScalar), &qqc ); CHKERRQ(ierr);
761       ierr = PetscMalloc( (M*N)*sizeof(PetscScalar), &qqr ); CHKERRQ(ierr);
762       ierr = PetscMalloc( N*sizeof(PetscScalar), &TAU ); CHKERRQ(ierr);
763       ierr = PetscMalloc( LWORK*sizeof(PetscScalar), &WORK ); CHKERRQ(ierr);
764       ierr = PetscMalloc( M*sizeof(PetscInt), &fids ); CHKERRQ(ierr);
765 
766       aggID = 0;
767       for( pos=PetscCDGetHeadPos(agg_llists,lid) ;
768            pos ;
769            pos=PetscCDGetNextPos(agg_llists,lid,pos)) {
770         PetscInt gid1 = LLNGetID(pos);
771         if( gid1 >= my0 && gid1 < Iend ) flid = gid1 - my0;
772         else {
773           ierr = GAMGTableFind( &fgid_flid, gid1, &flid ); CHKERRQ(ierr);
774           assert(flid>=0);
775         }
776         /* copy in B_i matrix - column oriented */
777         PetscReal *data = &data_in[flid*bs];
778         for( kk = ii = 0; ii < bs ; ii++ ) {
779           for( jj = 0; jj < N ; jj++ ) {
780             PetscReal d = data[jj*data_stride + ii];
781             qqc[jj*Mdata + aggID*bs + ii] = d;
782           }
783         }
784 #ifdef OUT_AGGS
785         if(llev==1) {
786           char str[] = "plot(%e,%e,'r*'), hold on,\n", col[] = "rgbkmc", sim[] = "*os+h>d<vx^";
787           PetscInt MM,pi,pj;
788           str[12] = col[(clid+17*mype)%6]; str[13] = sim[(clid+17*mype)%11];
789           MM = (PetscInt)(PetscSqrtScalar((PetscScalar)pM));
790           pj = gid1/MM; pi = gid1%MM;
791           fprintf(file,str,(double)pi,(double)pj);
792           /* fprintf(file,str,data[2*data_stride+1],-data[2*data_stride]); */
793         }
794 #endif
795         /* set fine IDs */
796         for(kk=0;kk<bs;kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk;
797 
798         aggID++;
799       }
800 
801       /* pad with zeros */
802       for( ii = asz*bs; ii < Mdata ; ii++ ) {
803 	for( jj = 0; jj < N ; jj++, kk++ ) {
804 	  qqc[jj*Mdata + ii] = .0;
805 	}
806       }
807 
808       ndone += aggID;
809       /* QR */
810       LAPACKgeqrf_( &Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
811       if( INFO != 0 ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRS error");
812       /* get R - column oriented - output B_{i+1} */
813       {
814         PetscReal *data = &out_data[clid*nSAvec];
815         for( jj = 0; jj < nSAvec ; jj++ ) {
816           for( ii = 0; ii < nSAvec ; ii++ ) {
817             assert(data[jj*out_data_stride + ii] == 1.e300);
818             if( ii <= jj ) data[jj*out_data_stride + ii] = PetscRealPart(qqc[jj*Mdata + ii]);
819 	    else data[jj*out_data_stride + ii] = 0.;
820           }
821         }
822       }
823 
824       /* get Q - row oriented */
825       LAPACKungqr_( &Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
826       if( INFO != 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR error arg %d",-INFO);
827 
828       for( ii = 0 ; ii < M ; ii++ ){
829         for( jj = 0 ; jj < N ; jj++ ) {
830           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
831         }
832       }
833 
834       /* add diagonal block of P0 */
835       for(kk=0;kk<N;kk++) {
836         cids[kk] = N*cgid + kk; /* global col IDs in P0 */
837       }
838       ierr = MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES); CHKERRQ(ierr);
839 
840       ierr = PetscFree( qqc );  CHKERRQ(ierr);
841       ierr = PetscFree( qqr );  CHKERRQ(ierr);
842       ierr = PetscFree( TAU );  CHKERRQ(ierr);
843       ierr = PetscFree( WORK );  CHKERRQ(ierr);
844       ierr = PetscFree( fids );  CHKERRQ(ierr);
845       clid++;
846     } /* coarse agg */
847   } /* for all fine nodes */
848   ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
849   ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
850 
851 /* ierr = MPI_Allreduce( &ndone, &ii, 1, MPIU_INT, MPIU_SUM, wcomm ); */
852 /* MatGetSize( a_Prol, &kk, &jj ); */
853 /* ierr = MPI_Allreduce( &minsz, &jj, 1, MPIU_INT, MPIU_MIN, wcomm ); */
854 /* PetscPrintf(PETSC_COMM_WORLD," **** [%d]%s %d total done, %d nodes (%d local done), min agg. size = %d\n",mype,__FUNCT__,ii,kk/bs,ndone,jj); */
855 
856 #ifdef OUT_AGGS
857   if(llev==1) fclose(file);
858 #endif
859   ierr = GAMGTableDestroy( &fgid_flid ); CHKERRQ(ierr);
860 
861   PetscFunctionReturn(0);
862 }
863 
864 /* -------------------------------------------------------------------------- */
865 /*
866    PCGAMGgraph_AGG
867 
868   Input Parameter:
869    . pc - this
870    . Amat - matrix on this fine level
871   Output Parameter:
872    . a_Gmat -
873 */
874 #undef __FUNCT__
875 #define __FUNCT__ "PCGAMGgraph_AGG"
876 PetscErrorCode PCGAMGgraph_AGG( PC pc,
877                                 const Mat Amat,
878                                 Mat *a_Gmat
879                                 )
880 {
881   PetscErrorCode ierr;
882   PC_MG          *mg = (PC_MG*)pc->data;
883   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
884   const PetscInt verbose = pc_gamg->verbose;
885   const PetscReal vfilter = pc_gamg->threshold;
886   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
887   PetscMPIInt    mype,npe;
888   Mat            Gmat;
889   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
890   PetscBool  set,flg,symm;
891 
892   PetscFunctionBegin;
893 #if defined PETSC_USE_LOG
894   ierr = PetscLogEventBegin(PC_GAMGGgraph_AGG,0,0,0,0);CHKERRQ(ierr);
895 #endif
896   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
897   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
898 
899   ierr = MatIsSymmetricKnown(Amat, &set, &flg);        CHKERRQ(ierr);
900   symm = (PetscBool)(pc_gamg_agg->sym_graph || !(set && flg));
901 
902   ierr  = PCGAMGCreateSimpleGraph( Amat, &Gmat ); CHKERRQ( ierr );
903   ierr  = PCGAMGScaleFilterGraph( &Gmat, vfilter, symm, verbose ); CHKERRQ( ierr );
904 
905   *a_Gmat = Gmat;
906 
907 #if defined PETSC_USE_LOG
908   ierr = PetscLogEventEnd(PC_GAMGGgraph_AGG,0,0,0,0);CHKERRQ(ierr);
909 #endif
910   PetscFunctionReturn(0);
911 }
912 
913 /* -------------------------------------------------------------------------- */
914 /*
915    PCGAMGCoarsen_AGG
916 
917   Input Parameter:
918    . a_pc - this
919   Input/Output Parameter:
920    . a_Gmat1 - graph on this fine level - coarsening can change this (squares it)
921   Output Parameter:
922    . agg_lists - list of aggregates
923 */
924 #undef __FUNCT__
925 #define __FUNCT__ "PCGAMGCoarsen_AGG"
926 PetscErrorCode PCGAMGCoarsen_AGG( PC a_pc,
927                                   Mat *a_Gmat1,
928                                   PetscCoarsenData **agg_lists
929                                   )
930 {
931   PetscErrorCode ierr;
932   PC_MG          *mg = (PC_MG*)a_pc->data;
933   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
934   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
935   Mat             mat,Gmat2, Gmat1 = *a_Gmat1; /* squared graph */
936   IS              perm;
937   PetscInt        Ii,nloc,bs,n,m;
938   PetscInt *permute;
939   PetscBool *bIndexSet;
940   MatCoarsen crs;
941   MPI_Comm        wcomm = ((PetscObject)Gmat1)->comm;
942   PetscMPIInt     mype,npe;
943 
944   PetscFunctionBegin;
945 #if defined PETSC_USE_LOG
946   ierr = PetscLogEventBegin(PC_GAMGCoarsen_AGG,0,0,0,0);CHKERRQ(ierr);
947 #endif
948   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
949   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
950   ierr = MatGetLocalSize( Gmat1, &n, &m ); CHKERRQ(ierr);
951   ierr = MatGetBlockSize( Gmat1, &bs ); CHKERRQ(ierr); assert(bs==1);
952   nloc = n/bs;
953 
954   if( pc_gamg_agg->square_graph ) {
955     ierr = MatTransposeMatMult( Gmat1, Gmat1, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Gmat2 );
956     CHKERRQ(ierr);
957   }
958   else Gmat2 = Gmat1;
959 
960   /* get MIS aggs */
961   /* randomize */
962   ierr = PetscMalloc( nloc*sizeof(PetscInt), &permute ); CHKERRQ(ierr);
963   ierr = PetscMalloc( nloc*sizeof(PetscBool), &bIndexSet ); CHKERRQ(ierr);
964   for ( Ii = 0; Ii < nloc ; Ii++ ){
965     bIndexSet[Ii] = PETSC_FALSE;
966     permute[Ii] = Ii;
967   }
968   srand(1); /* make deterministic */
969   for ( Ii = 0; Ii < nloc ; Ii++ ) {
970     PetscInt iSwapIndex = rand()%nloc;
971     if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
972       PetscInt iTemp = permute[iSwapIndex];
973       permute[iSwapIndex] = permute[Ii];
974       permute[Ii] = iTemp;
975       bIndexSet[iSwapIndex] = PETSC_TRUE;
976     }
977   }
978   ierr = PetscFree( bIndexSet );  CHKERRQ(ierr);
979 
980   ierr = ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm);
981   CHKERRQ(ierr);
982 #if defined PETSC_GAMG_USE_LOG
983   ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr);
984 #endif
985   ierr = MatCoarsenCreate( wcomm, &crs ); CHKERRQ(ierr);
986   /* ierr = MatCoarsenSetType( crs, MATCOARSENMIS ); CHKERRQ(ierr); */
987   ierr = MatCoarsenSetFromOptions( crs ); CHKERRQ(ierr);
988   ierr = MatCoarsenSetGreedyOrdering( crs, perm ); CHKERRQ(ierr);
989   ierr = MatCoarsenSetAdjacency( crs, Gmat2 ); CHKERRQ(ierr);
990   ierr = MatCoarsenSetVerbose( crs, pc_gamg->verbose ); CHKERRQ(ierr);
991   ierr = MatCoarsenSetStrictAggs( crs, PETSC_TRUE ); CHKERRQ(ierr);
992   ierr = MatCoarsenApply( crs ); CHKERRQ(ierr);
993   ierr = MatCoarsenGetData( crs, agg_lists ); CHKERRQ(ierr); /* output */
994   ierr = MatCoarsenDestroy( &crs ); CHKERRQ(ierr);
995 
996   ierr = ISDestroy( &perm );                    CHKERRQ(ierr);
997   ierr = PetscFree( permute );  CHKERRQ(ierr);
998 #if defined PETSC_GAMG_USE_LOG
999   ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr);
1000 #endif
1001   /* smooth aggs */
1002   if( Gmat2 != Gmat1 ) {
1003     const PetscCoarsenData *llist = *agg_lists;
1004     ierr = smoothAggs( Gmat2, Gmat1, *agg_lists ); CHKERRQ(ierr);
1005     ierr = MatDestroy( &Gmat1 );  CHKERRQ(ierr);
1006     *a_Gmat1 = Gmat2; /* output */
1007     ierr = PetscCDGetMat( llist, &mat );  CHKERRQ(ierr);
1008     if(mat) SETERRQ(wcomm,PETSC_ERR_ARG_WRONG, "Auxilary matrix with squared graph????");
1009   }
1010   else {
1011     const PetscCoarsenData *llist = *agg_lists;
1012     /* see if we have a matrix that takes pecedence (returned from MatCoarsenAppply) */
1013     ierr = PetscCDGetMat( llist, &mat );   CHKERRQ(ierr);
1014     if( mat ) {
1015       ierr = MatDestroy( &Gmat1 );  CHKERRQ(ierr);
1016       *a_Gmat1 = mat; /* output */
1017     }
1018   }
1019 #if defined PETSC_USE_LOG
1020   ierr = PetscLogEventEnd(PC_GAMGCoarsen_AGG,0,0,0,0);CHKERRQ(ierr);
1021 #endif
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /* -------------------------------------------------------------------------- */
1026 /*
1027  PCGAMGProlongator_AGG
1028 
1029  Input Parameter:
1030  . pc - this
1031  . Amat - matrix on this fine level
1032  . Graph - used to get ghost data for nodes in
1033  . agg_lists - list of aggregates
1034  Output Parameter:
1035  . a_P_out - prolongation operator to the next level
1036  */
1037 #undef __FUNCT__
1038 #define __FUNCT__ "PCGAMGProlongator_AGG"
1039 PetscErrorCode PCGAMGProlongator_AGG( PC pc,
1040                                       const Mat Amat,
1041                                       const Mat Gmat,
1042                                       PetscCoarsenData *agg_lists,
1043                                       Mat *a_P_out
1044                                       )
1045 {
1046   PC_MG          *mg = (PC_MG*)pc->data;
1047   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1048   const PetscInt verbose = pc_gamg->verbose;
1049   const PetscInt data_cols = pc_gamg->data_cell_cols;
1050   PetscErrorCode ierr;
1051   PetscInt       Istart,Iend,nloc,ii,jj,kk,my0,nLocalSelected,bs;
1052   Mat            Prol;
1053   PetscMPIInt    mype, npe;
1054   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
1055   const PetscInt col_bs=data_cols;
1056   PetscReal      *data_w_ghost;
1057   PetscInt       myCrs0, nbnodes=0, *flid_fgid;
1058 
1059   PetscFunctionBegin;
1060 #if defined PETSC_USE_LOG
1061   ierr = PetscLogEventBegin(PC_GAMGProlongator_AGG,0,0,0,0);CHKERRQ(ierr);
1062 #endif
1063   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
1064   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
1065   ierr = MatGetOwnershipRange( Amat, &Istart, &Iend ); CHKERRQ(ierr);
1066   ierr  = MatGetBlockSize( Amat, &bs ); CHKERRQ( ierr );
1067   nloc = (Iend-Istart)/bs; my0 = Istart/bs; assert((Iend-Istart)%bs==0);
1068 
1069   /* get 'nLocalSelected' */
1070   for( ii=0, nLocalSelected = 0 ; ii < nloc ; ii++ ){
1071     /* filter out singletons 0 or 1? */
1072     if( PetscCDSizeAt(agg_lists, ii) > 0 ) nLocalSelected++;
1073   }
1074 
1075   /* create prolongator, create P matrix */
1076   ierr = MatCreateAIJ( wcomm,
1077                        nloc*bs, nLocalSelected*col_bs,
1078                        PETSC_DETERMINE, PETSC_DETERMINE,
1079                        data_cols, PETSC_NULL, data_cols, PETSC_NULL,
1080                        &Prol );
1081   CHKERRQ(ierr);
1082 
1083   /* can get all points "removed" */
1084   ierr =  MatGetSize( Prol, &kk, &ii ); CHKERRQ(ierr);
1085   if( ii==0 ) {
1086     if( verbose ) {
1087       PetscPrintf(wcomm,"[%d]%s no selected points on coarse grid\n",mype,__FUNCT__);
1088     }
1089     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
1090     *a_P_out = PETSC_NULL;  /* out */
1091     PetscFunctionReturn(0);
1092   }
1093   if( verbose ) {
1094     PetscPrintf(PETSC_COMM_WORLD,"\t\t[%d]%s New grid %d nodes\n",mype,__FUNCT__,ii/col_bs);
1095   }
1096   ierr = MatGetOwnershipRangeColumn( Prol, &myCrs0, &kk ); CHKERRQ(ierr);
1097 
1098   assert((kk-myCrs0)%col_bs==0);
1099   myCrs0 = myCrs0/col_bs;
1100   assert((kk/col_bs-myCrs0)==nLocalSelected);
1101 
1102   /* create global vector of data in 'data_w_ghost' */
1103 #if defined PETSC_GAMG_USE_LOG
1104   ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
1105 #endif
1106   if (npe > 1) { /*  */
1107     PetscReal *tmp_gdata,*tmp_ldata,*tp2;
1108     ierr = PetscMalloc( nloc*sizeof(PetscReal), &tmp_ldata ); CHKERRQ(ierr);
1109     for( jj = 0 ; jj < data_cols ; jj++ ){
1110       for( kk = 0 ; kk < bs ; kk++) {
1111         PetscInt ii,nnodes;
1112         const PetscReal *tp = pc_gamg->data + jj*bs*nloc + kk;
1113         for( ii = 0 ; ii < nloc ; ii++, tp += bs ){
1114           tmp_ldata[ii] = *tp;
1115         }
1116         ierr = PCGAMGGetDataWithGhosts( Gmat, 1, tmp_ldata, &nnodes, &tmp_gdata );
1117         CHKERRQ(ierr);
1118         if(jj==0 && kk==0) { /* now I know how many todal nodes - allocate */
1119           ierr = PetscMalloc( nnodes*bs*data_cols*sizeof(PetscReal), &data_w_ghost ); CHKERRQ(ierr);
1120           nbnodes = bs*nnodes;
1121         }
1122         tp2 = data_w_ghost + jj*bs*nnodes + kk;
1123         for( ii = 0 ; ii < nnodes ; ii++, tp2 += bs ){
1124           *tp2 = tmp_gdata[ii];
1125         }
1126         ierr = PetscFree( tmp_gdata ); CHKERRQ(ierr);
1127       }
1128     }
1129     ierr = PetscFree( tmp_ldata ); CHKERRQ(ierr);
1130   }
1131   else {
1132     nbnodes = bs*nloc;
1133     data_w_ghost = (PetscReal*)pc_gamg->data;
1134   }
1135 
1136   /* get P0 */
1137   if( npe > 1 ){
1138     PetscReal *fid_glid_loc,*fiddata;
1139     PetscInt nnodes;
1140 
1141     ierr = PetscMalloc( nloc*sizeof(PetscReal), &fid_glid_loc ); CHKERRQ(ierr);
1142     for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
1143     ierr = PCGAMGGetDataWithGhosts( Gmat, 1, fid_glid_loc, &nnodes, &fiddata );
1144     CHKERRQ(ierr);
1145     ierr = PetscMalloc( nnodes*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
1146     for(kk=0;kk<nnodes;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1147     ierr = PetscFree( fiddata ); CHKERRQ(ierr);
1148     assert(nnodes==nbnodes/bs);
1149     ierr = PetscFree( fid_glid_loc ); CHKERRQ(ierr);
1150   }
1151   else {
1152     ierr = PetscMalloc( nloc*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
1153     for(kk=0;kk<nloc;kk++) flid_fgid[kk] = my0 + kk;
1154   }
1155 #if defined PETSC_GAMG_USE_LOG
1156   ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
1157   ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET8],0,0,0,0);CHKERRQ(ierr);
1158 #endif
1159   {
1160     PetscReal *data_out;
1161     ierr = formProl0( agg_lists, bs, data_cols, myCrs0, nbnodes,
1162                       data_w_ghost, flid_fgid, &data_out, Prol );
1163     CHKERRQ(ierr);
1164     ierr = PetscFree( pc_gamg->data ); CHKERRQ( ierr );
1165     pc_gamg->data = data_out;
1166     pc_gamg->data_cell_rows = data_cols;
1167     pc_gamg->data_sz = data_cols*data_cols*nLocalSelected;
1168   }
1169 #if defined PETSC_GAMG_USE_LOG
1170   ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET8],0,0,0,0);CHKERRQ(ierr);
1171 #endif
1172   if (npe > 1) ierr = PetscFree( data_w_ghost );      CHKERRQ(ierr);
1173   ierr = PetscFree( flid_fgid ); CHKERRQ(ierr);
1174 
1175   /* attach block size of columns */
1176   if( pc_gamg->col_bs_id == -1 ) {
1177     ierr = PetscObjectComposedDataRegister( &pc_gamg->col_bs_id ); assert(pc_gamg->col_bs_id != -1 );
1178   }
1179   ierr = PetscObjectComposedDataSetInt( (PetscObject)Prol, pc_gamg->col_bs_id, data_cols ); CHKERRQ(ierr);
1180 
1181   *a_P_out = Prol;  /* out */
1182 #if defined PETSC_USE_LOG
1183   ierr = PetscLogEventEnd(PC_GAMGProlongator_AGG,0,0,0,0);CHKERRQ(ierr);
1184 #endif
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /* -------------------------------------------------------------------------- */
1189 /*
1190    PCGAMGOptprol_AGG
1191 
1192   Input Parameter:
1193    . pc - this
1194    . Amat - matrix on this fine level
1195  In/Output Parameter:
1196    . a_P_out - prolongation operator to the next level
1197 */
1198 #undef __FUNCT__
1199 #define __FUNCT__ "PCGAMGOptprol_AGG"
1200 PetscErrorCode PCGAMGOptprol_AGG( PC pc,
1201                                   const Mat Amat,
1202                                   Mat *a_P
1203                                   )
1204 {
1205   PetscErrorCode ierr;
1206   PC_MG          *mg = (PC_MG*)pc->data;
1207   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1208   const PetscInt verbose = pc_gamg->verbose;
1209   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
1210   PetscInt       jj;
1211   PetscMPIInt    mype,npe;
1212   Mat            Prol = *a_P;
1213   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
1214 
1215   PetscFunctionBegin;
1216 #if defined PETSC_USE_LOG
1217   ierr = PetscLogEventBegin(PC_GAMGOptprol_AGG,0,0,0,0);CHKERRQ(ierr);
1218 #endif
1219   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
1220   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
1221 
1222   /* smooth P0 */
1223   for( jj = 0 ; jj < pc_gamg_agg->nsmooths ; jj++ ){
1224     Mat tMat;
1225     Vec diag;
1226     PetscReal alpha, emax, emin;
1227 #if defined PETSC_GAMG_USE_LOG
1228     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
1229 #endif
1230     if( jj == 0 ) {
1231       KSP eksp;
1232       Vec bb, xx;
1233       PC pc;
1234       ierr = MatGetVecs( Amat, &bb, 0 );         CHKERRQ(ierr);
1235       ierr = MatGetVecs( Amat, &xx, 0 );         CHKERRQ(ierr);
1236       {
1237         PetscRandom    rctx;
1238         ierr = PetscRandomCreate(wcomm,&rctx);CHKERRQ(ierr);
1239         ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
1240         ierr = VecSetRandom(bb,rctx);CHKERRQ(ierr);
1241         ierr = PetscRandomDestroy( &rctx ); CHKERRQ(ierr);
1242       }
1243       ierr = KSPCreate(wcomm,&eksp);                            CHKERRQ(ierr);
1244       ierr = KSPAppendOptionsPrefix( eksp, "est_");         CHKERRQ(ierr);
1245       ierr = KSPSetFromOptions( eksp );    CHKERRQ(ierr);
1246       ierr = KSPSetInitialGuessNonzero( eksp, PETSC_FALSE );    CHKERRQ(ierr);
1247       ierr = KSPSetOperators( eksp, Amat, Amat, SAME_NONZERO_PATTERN );
1248       CHKERRQ( ierr );
1249       ierr = KSPGetPC( eksp, &pc );                              CHKERRQ( ierr );
1250       ierr = PCSetType( pc, PCJACOBI ); CHKERRQ(ierr);  /* smoother */
1251       ierr = KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,10);
1252       CHKERRQ(ierr);
1253       ierr = KSPSetNormType( eksp, KSP_NORM_NONE );                 CHKERRQ(ierr);
1254       ierr = KSPSetComputeSingularValues( eksp,PETSC_TRUE );        CHKERRQ(ierr);
1255 
1256       /* solve - keep stuff out of logging */
1257       ierr = PetscLogEventDeactivate(KSP_Solve);CHKERRQ(ierr);
1258       ierr = PetscLogEventDeactivate(PC_Apply);CHKERRQ(ierr);
1259       ierr = KSPSolve( eksp, bb, xx );                              CHKERRQ(ierr);
1260       ierr = PetscLogEventActivate(KSP_Solve);CHKERRQ(ierr);
1261       ierr = PetscLogEventActivate(PC_Apply);CHKERRQ(ierr);
1262 
1263       ierr = KSPComputeExtremeSingularValues( eksp, &emax, &emin ); CHKERRQ(ierr);
1264       if( verbose ) {
1265         PetscPrintf(wcomm,"\t\t\t%s smooth P0: max eigen=%e min=%e PC=%s\n",
1266                     __FUNCT__,emax,emin,PCJACOBI);
1267       }
1268       ierr = VecDestroy( &xx );       CHKERRQ(ierr);
1269       ierr = VecDestroy( &bb );       CHKERRQ(ierr);
1270       ierr = KSPDestroy( &eksp );     CHKERRQ(ierr);
1271 
1272       if( pc_gamg->emax_id == -1 ) {
1273         ierr = PetscObjectComposedDataRegister( &pc_gamg->emax_id );
1274         assert(pc_gamg->emax_id != -1 );
1275       }
1276       ierr = PetscObjectComposedDataSetScalar( (PetscObject)Amat, pc_gamg->emax_id, emax ); CHKERRQ(ierr);
1277     }
1278 
1279     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1280     ierr = MatMatMult( Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat );   CHKERRQ(ierr);
1281     ierr = MatGetVecs( Amat, &diag, 0 );    CHKERRQ(ierr);
1282     ierr = MatGetDiagonal( Amat, diag );    CHKERRQ(ierr); /* effectively PCJACOBI */
1283     ierr = VecReciprocal( diag );         CHKERRQ(ierr);
1284     ierr = MatDiagonalScale( tMat, diag, 0 ); CHKERRQ(ierr);
1285     ierr = VecDestroy( &diag );           CHKERRQ(ierr);
1286     alpha = -1.5/emax;
1287     ierr = MatAYPX( tMat, alpha, Prol, SUBSET_NONZERO_PATTERN );           CHKERRQ(ierr);
1288     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
1289     Prol = tMat;
1290 #if defined PETSC_GAMG_USE_LOG
1291     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
1292 #endif
1293   }
1294 #if defined PETSC_USE_LOG
1295   ierr = PetscLogEventEnd(PC_GAMGOptprol_AGG,0,0,0,0);CHKERRQ(ierr);
1296 #endif
1297   *a_P = Prol;
1298 
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 /* -------------------------------------------------------------------------- */
1303 /*
1304    PCCreateGAMG_AGG
1305 
1306   Input Parameter:
1307    . pc -
1308 */
1309 #undef __FUNCT__
1310 #define __FUNCT__ "PCCreateGAMG_AGG"
1311 PetscErrorCode  PCCreateGAMG_AGG( PC pc )
1312 {
1313   PetscErrorCode  ierr;
1314   PC_MG           *mg = (PC_MG*)pc->data;
1315   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
1316   PC_GAMG_AGG      *pc_gamg_agg;
1317 
1318   PetscFunctionBegin;
1319   /* create sub context for SA */
1320   ierr = PetscNewLog( pc, PC_GAMG_AGG, &pc_gamg_agg ); CHKERRQ(ierr);
1321   assert(!pc_gamg->subctx);
1322   pc_gamg->subctx = pc_gamg_agg;
1323 
1324   pc->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1325   pc->ops->destroy        = PCDestroy_AGG;
1326   /* reset does not do anything; setup not virtual */
1327 
1328   /* set internal function pointers */
1329   pc_gamg->graph = PCGAMGgraph_AGG;
1330   pc_gamg->coarsen = PCGAMGCoarsen_AGG;
1331   pc_gamg->prolongator = PCGAMGProlongator_AGG;
1332   pc_gamg->optprol = PCGAMGOptprol_AGG;
1333 
1334   pc_gamg->createdefaultdata = PCSetData_AGG;
1335 
1336   ierr = PetscObjectComposeFunctionDynamic( (PetscObject)pc,
1337                                             "PCSetCoordinates_C",
1338                                             "PCSetCoordinates_AGG",
1339                                             PCSetCoordinates_AGG);
1340   PetscFunctionReturn(0);
1341 }
1342