xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision c3b5f7ba6bc5ce25a01a67bb37ba5d34b02bbbd7)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h>        /*I "petscpc.h" I*/
6 #include <petscblaslapack.h>
7 #include <petscdm.h>
8 #include <petsc/private/kspimpl.h>
9 
10 typedef struct {
11   PetscInt  nsmooths;
12   PetscBool sym_graph;
13   PetscInt  square_graph;
14 } PC_GAMG_AGG;
15 
16 /*@
17    PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical)
18 
19    Logically Collective on PC
20 
21    Input Parameters:
22 .  pc - the preconditioner context
23 
24    Options Database Key:
25 .  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
26 
27    Level: intermediate
28 
29 .seealso: ()
30 @*/
31 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
32 {
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
35   PetscValidLogicalCollectiveInt(pc,n,2);
36   PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode PCGAMGSetNSmooths_AGG(PC pc, PetscInt n)
41 {
42   PC_MG       *mg          = (PC_MG*)pc->data;
43   PC_GAMG     *pc_gamg     = (PC_GAMG*)mg->innerctx;
44   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
45 
46   PetscFunctionBegin;
47   pc_gamg_agg->nsmooths = n;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52    PCGAMGSetSymGraph - Symmetrize the graph before computing the aggregation. Some algorithms require the graph be symmetric
53 
54    Logically Collective on PC
55 
56    Input Parameters:
57 +  pc - the preconditioner context
58 -  n - PETSC_TRUE or PETSC_FALSE
59 
60    Options Database Key:
61 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
62 
63    Level: intermediate
64 
65 .seealso: PCGAMGSetSquareGraph()
66 @*/
67 PetscErrorCode PCGAMGSetSymGraph(PC pc, PetscBool n)
68 {
69   PetscFunctionBegin;
70   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
71   PetscValidLogicalCollectiveBool(pc,n,2);
72   PetscTryMethod(pc,"PCGAMGSetSymGraph_C",(PC,PetscBool),(pc,n));
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode PCGAMGSetSymGraph_AGG(PC pc, PetscBool n)
77 {
78   PC_MG       *mg          = (PC_MG*)pc->data;
79   PC_GAMG     *pc_gamg     = (PC_GAMG*)mg->innerctx;
80   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
81 
82   PetscFunctionBegin;
83   pc_gamg_agg->sym_graph = n;
84   PetscFunctionReturn(0);
85 }
86 
87 /*@
88    PCGAMGSetSquareGraph -  Square the graph, ie. compute A'*A before aggregating it
89 
90    Logically Collective on PC
91 
92    Input Parameters:
93 +  pc - the preconditioner context
94 -  n - 0, 1 or more
95 
96    Options Database Key:
97 .  -pc_gamg_square_graph <n,default = 1> - number of levels to square the graph on before aggregating it
98 
99    Notes:
100    Squaring the graph increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
101 
102    Level: intermediate
103 
104 .seealso: PCGAMGSetSymGraph(), PCGAMGSetThreshold()
105 @*/
106 PetscErrorCode PCGAMGSetSquareGraph(PC pc, PetscInt n)
107 {
108   PetscFunctionBegin;
109   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
110   PetscValidLogicalCollectiveInt(pc,n,2);
111   PetscTryMethod(pc,"PCGAMGSetSquareGraph_C",(PC,PetscInt),(pc,n));
112   PetscFunctionReturn(0);
113 }
114 
115 static PetscErrorCode PCGAMGSetSquareGraph_AGG(PC pc, PetscInt n)
116 {
117   PC_MG       *mg          = (PC_MG*)pc->data;
118   PC_GAMG     *pc_gamg     = (PC_GAMG*)mg->innerctx;
119   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
120 
121   PetscFunctionBegin;
122   pc_gamg_agg->square_graph = n;
123   PetscFunctionReturn(0);
124 }
125 
126 static PetscErrorCode PCSetFromOptions_GAMG_AGG(PetscOptionItems *PetscOptionsObject,PC pc)
127 {
128   PC_MG          *mg          = (PC_MG*)pc->data;
129   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
130   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
131 
132   PetscFunctionBegin;
133   PetscCall(PetscOptionsHead(PetscOptionsObject,"GAMG-AGG options"));
134   {
135     PetscCall(PetscOptionsInt("-pc_gamg_agg_nsmooths","smoothing steps for smoothed aggregation, usually 1","PCGAMGSetNSmooths",pc_gamg_agg->nsmooths,&pc_gamg_agg->nsmooths,NULL));
136     PetscCall(PetscOptionsBool("-pc_gamg_sym_graph","Set for asymmetric matrices","PCGAMGSetSymGraph",pc_gamg_agg->sym_graph,&pc_gamg_agg->sym_graph,NULL));
137     PetscCall(PetscOptionsInt("-pc_gamg_square_graph","Number of levels to square graph for faster coarsening and lower coarse grid complexity","PCGAMGSetSquareGraph",pc_gamg_agg->square_graph,&pc_gamg_agg->square_graph,NULL));
138   }
139   PetscCall(PetscOptionsTail());
140   PetscFunctionReturn(0);
141 }
142 
143 /* -------------------------------------------------------------------------- */
144 static PetscErrorCode PCDestroy_GAMG_AGG(PC pc)
145 {
146   PC_MG          *mg          = (PC_MG*)pc->data;
147   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
148 
149   PetscFunctionBegin;
150   PetscCall(PetscFree(pc_gamg->subctx));
151   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL));
152   PetscFunctionReturn(0);
153 }
154 
155 /* -------------------------------------------------------------------------- */
156 /*
157    PCSetCoordinates_AGG
158      - collective
159 
160    Input Parameter:
161    . pc - the preconditioner context
162    . ndm - dimesion of data (used for dof/vertex for Stokes)
163    . a_nloc - number of vertices local
164    . coords - [a_nloc][ndm] - interleaved coordinate data: {x_0, y_0, z_0, x_1, y_1, ...}
165 */
166 
167 static PetscErrorCode PCSetCoordinates_AGG(PC pc, PetscInt ndm, PetscInt a_nloc, PetscReal *coords)
168 {
169   PC_MG          *mg      = (PC_MG*)pc->data;
170   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
171   PetscInt       arrsz,kk,ii,jj,nloc,ndatarows,ndf;
172   Mat            mat = pc->pmat;
173 
174   PetscFunctionBegin;
175   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
176   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
177   nloc = a_nloc;
178 
179   /* SA: null space vectors */
180   PetscCall(MatGetBlockSize(mat, &ndf)); /* this does not work for Stokes */
181   if (coords && ndf==1) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
182   else if (coords) {
183     PetscCheckFalse(ndm > ndf,PETSC_COMM_SELF,PETSC_ERR_PLIB,"degrees of motion %D > block size %D",ndm,ndf);
184     pc_gamg->data_cell_cols = (ndm==2 ? 3 : 6); /* displacement elasticity */
185     if (ndm != ndf) {
186       PetscCheckFalse(pc_gamg->data_cell_cols != ndf,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Don't know how to create null space for ndm=%D, ndf=%D.  Use MatSetNearNullSpace().",ndm,ndf);
187     }
188   } else pc_gamg->data_cell_cols = ndf; /* no data, force SA with constant null space vectors */
189   pc_gamg->data_cell_rows = ndatarows = ndf;
190   PetscCheckFalse(pc_gamg->data_cell_cols <= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"pc_gamg->data_cell_cols %D <= 0",pc_gamg->data_cell_cols);
191   arrsz = nloc*pc_gamg->data_cell_rows*pc_gamg->data_cell_cols;
192 
193   if (!pc_gamg->data || (pc_gamg->data_sz != arrsz)) {
194     PetscCall(PetscFree(pc_gamg->data));
195     PetscCall(PetscMalloc1(arrsz+1, &pc_gamg->data));
196   }
197   /* copy data in - column oriented */
198   for (kk=0; kk<nloc; kk++) {
199     const PetscInt M     = nloc*pc_gamg->data_cell_rows; /* stride into data */
200     PetscReal      *data = &pc_gamg->data[kk*ndatarows]; /* start of cell */
201     if (pc_gamg->data_cell_cols==1) *data = 1.0;
202     else {
203       /* translational modes */
204       for (ii=0;ii<ndatarows;ii++) {
205         for (jj=0;jj<ndatarows;jj++) {
206           if (ii==jj)data[ii*M + jj] = 1.0;
207           else data[ii*M + jj] = 0.0;
208         }
209       }
210 
211       /* rotational modes */
212       if (coords) {
213         if (ndm == 2) {
214           data   += 2*M;
215           data[0] = -coords[2*kk+1];
216           data[1] =  coords[2*kk];
217         } else {
218           data   += 3*M;
219           data[0] = 0.0;             data[M+0] =  coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1];
220           data[1] = -coords[3*kk+2]; data[M+1] = 0.0;             data[2*M+1] =  coords[3*kk];
221           data[2] =  coords[3*kk+1]; data[M+2] = -coords[3*kk];   data[2*M+2] = 0.0;
222         }
223       }
224     }
225   }
226   pc_gamg->data_sz = arrsz;
227   PetscFunctionReturn(0);
228 }
229 
230 typedef PetscInt NState;
231 static const NState NOT_DONE=-2;
232 static const NState DELETED =-1;
233 static const NState REMOVED =-3;
234 #define IS_SELECTED(s) (s!=DELETED && s!=NOT_DONE && s!=REMOVED)
235 
236 /* -------------------------------------------------------------------------- */
237 /*
238    smoothAggs - greedy grab of with G1 (unsquared graph) -- AIJ specific
239      - AGG-MG specific: clears singletons out of 'selected_2'
240 
241    Input Parameter:
242    . Gmat_2 - global matrix of graph (data not defined)   base (squared) graph
243    . Gmat_1 - base graph to grab with                 base graph
244    Input/Output Parameter:
245    . aggs_2 - linked list of aggs with gids)
246 */
247 static PetscErrorCode smoothAggs(PC pc,Mat Gmat_2, Mat Gmat_1,PetscCoarsenData *aggs_2)
248 {
249   PetscBool      isMPI;
250   Mat_SeqAIJ     *matA_1, *matB_1=NULL;
251   MPI_Comm       comm;
252   PetscInt       lid,*ii,*idx,ix,Iend,my0,kk,n,j;
253   Mat_MPIAIJ     *mpimat_2 = NULL, *mpimat_1=NULL;
254   const PetscInt nloc      = Gmat_2->rmap->n;
255   PetscScalar    *cpcol_1_state,*cpcol_2_state,*cpcol_2_par_orig,*lid_parent_gid;
256   PetscInt       *lid_cprowID_1;
257   NState         *lid_state;
258   Vec            ghost_par_orig2;
259 
260   PetscFunctionBegin;
261   PetscCall(PetscObjectGetComm((PetscObject)Gmat_2,&comm));
262   PetscCall(MatGetOwnershipRange(Gmat_1,&my0,&Iend));
263 
264   /* get submatrices */
265   PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name,MATMPIAIJ,&isMPI));
266   if (isMPI) {
267     /* grab matrix objects */
268     mpimat_2 = (Mat_MPIAIJ*)Gmat_2->data;
269     mpimat_1 = (Mat_MPIAIJ*)Gmat_1->data;
270     matA_1   = (Mat_SeqAIJ*)mpimat_1->A->data;
271     matB_1   = (Mat_SeqAIJ*)mpimat_1->B->data;
272 
273     /* force compressed row storage for B matrix in AuxMat */
274     PetscCall(MatCheckCompressedRow(mpimat_1->B,matB_1->nonzerorowcnt,&matB_1->compressedrow,matB_1->i,Gmat_1->rmap->n,-1.0));
275 
276     PetscCall(PetscMalloc1(nloc, &lid_cprowID_1));
277     for (lid = 0; lid < nloc; lid++) lid_cprowID_1[lid] = -1;
278     for (ix=0; ix<matB_1->compressedrow.nrows; ix++) {
279       PetscInt lid = matB_1->compressedrow.rindex[ix];
280       lid_cprowID_1[lid] = ix;
281     }
282   } else {
283     PetscBool isAIJ;
284     PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name,MATSEQAIJ,&isAIJ));
285     PetscCheck(isAIJ,PETSC_COMM_SELF,PETSC_ERR_USER,"Require AIJ matrix.");
286     matA_1        = (Mat_SeqAIJ*)Gmat_1->data;
287     lid_cprowID_1 = NULL;
288   }
289   if (nloc>0) {
290     PetscCheckFalse(matB_1 && !matB_1->compressedrow.use,PETSC_COMM_SELF,PETSC_ERR_PLIB,"matB_1 && !matB_1->compressedrow.use: PETSc bug???");
291   }
292   /* get state of locals and selected gid for deleted */
293   PetscCall(PetscMalloc2(nloc, &lid_state,nloc, &lid_parent_gid));
294   for (lid = 0; lid < nloc; lid++) {
295     lid_parent_gid[lid] = -1.0;
296     lid_state[lid]      = DELETED;
297   }
298 
299   /* set lid_state */
300   for (lid = 0; lid < nloc; lid++) {
301     PetscCDIntNd *pos;
302     PetscCall(PetscCDGetHeadPos(aggs_2,lid,&pos));
303     if (pos) {
304       PetscInt gid1;
305 
306       PetscCall(PetscCDIntNdGetID(pos, &gid1));
307       PetscCheckFalse(gid1 != lid+my0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"gid1 %D != lid %D + my0 %D",gid1,lid,my0);
308       lid_state[lid] = gid1;
309     }
310   }
311 
312   /* map local to selected local, DELETED means a ghost owns it */
313   for (lid=kk=0; lid<nloc; lid++) {
314     NState state = lid_state[lid];
315     if (IS_SELECTED(state)) {
316       PetscCDIntNd *pos;
317       PetscCall(PetscCDGetHeadPos(aggs_2,lid,&pos));
318       while (pos) {
319         PetscInt gid1;
320         PetscCall(PetscCDIntNdGetID(pos, &gid1));
321         PetscCall(PetscCDGetNextPos(aggs_2,lid,&pos));
322         if (gid1 >= my0 && gid1 < Iend) lid_parent_gid[gid1-my0] = (PetscScalar)(lid + my0);
323       }
324     }
325   }
326   /* get 'cpcol_1/2_state' & cpcol_2_par_orig - uses mpimat_1/2->lvec for temp space */
327   if (isMPI) {
328     Vec tempVec;
329     /* get 'cpcol_1_state' */
330     PetscCall(MatCreateVecs(Gmat_1, &tempVec, NULL));
331     for (kk=0,j=my0; kk<nloc; kk++,j++) {
332       PetscScalar v = (PetscScalar)lid_state[kk];
333       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
334     }
335     PetscCall(VecAssemblyBegin(tempVec));
336     PetscCall(VecAssemblyEnd(tempVec));
337     PetscCall(VecScatterBegin(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD));
338     PetscCall(VecScatterEnd(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD));
339     PetscCall(VecGetArray(mpimat_1->lvec, &cpcol_1_state));
340     /* get 'cpcol_2_state' */
341     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD));
342     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD));
343     PetscCall(VecGetArray(mpimat_2->lvec, &cpcol_2_state));
344     /* get 'cpcol_2_par_orig' */
345     for (kk=0,j=my0; kk<nloc; kk++,j++) {
346       PetscScalar v = (PetscScalar)lid_parent_gid[kk];
347       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
348     }
349     PetscCall(VecAssemblyBegin(tempVec));
350     PetscCall(VecAssemblyEnd(tempVec));
351     PetscCall(VecDuplicate(mpimat_2->lvec, &ghost_par_orig2));
352     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, ghost_par_orig2,INSERT_VALUES,SCATTER_FORWARD));
353     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, ghost_par_orig2,INSERT_VALUES,SCATTER_FORWARD));
354     PetscCall(VecGetArray(ghost_par_orig2, &cpcol_2_par_orig));
355 
356     PetscCall(VecDestroy(&tempVec));
357   } /* ismpi */
358   for (lid=0; lid<nloc; lid++) {
359     NState state = lid_state[lid];
360     if (IS_SELECTED(state)) {
361       /* steal locals */
362       ii  = matA_1->i; n = ii[lid+1] - ii[lid];
363       idx = matA_1->j + ii[lid];
364       for (j=0; j<n; j++) {
365         PetscInt lidj   = idx[j], sgid;
366         NState   statej = lid_state[lidj];
367         if (statej==DELETED && (sgid=(PetscInt)PetscRealPart(lid_parent_gid[lidj])) != lid+my0) { /* steal local */
368           lid_parent_gid[lidj] = (PetscScalar)(lid+my0); /* send this if sgid is not local */
369           if (sgid >= my0 && sgid < Iend) {       /* I'm stealing this local from a local sgid */
370             PetscInt     hav=0,slid=sgid-my0,gidj=lidj+my0;
371             PetscCDIntNd *pos,*last=NULL;
372             /* looking for local from local so id_llist_2 works */
373             PetscCall(PetscCDGetHeadPos(aggs_2,slid,&pos));
374             while (pos) {
375               PetscInt gid;
376               PetscCall(PetscCDIntNdGetID(pos, &gid));
377               if (gid == gidj) {
378                 PetscCheck(last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"last cannot be null");
379                 PetscCall(PetscCDRemoveNextNode(aggs_2, slid, last));
380                 PetscCall(PetscCDAppendNode(aggs_2, lid, pos));
381                 hav  = 1;
382                 break;
383               } else last = pos;
384               PetscCall(PetscCDGetNextPos(aggs_2,slid,&pos));
385             }
386             if (hav != 1) {
387               PetscCheck(hav,PETSC_COMM_SELF,PETSC_ERR_PLIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
388               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"found node %D times???",hav);
389             }
390           } else {            /* I'm stealing this local, owned by a ghost */
391             PetscCheckFalse(sgid != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Mat has an un-symmetric graph. Use '-%spc_gamg_sym_graph true' to symmetrize the graph or '-%spc_gamg_threshold -1' if the matrix is structurally symmetric.",((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "",((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "");
392             PetscCall(PetscCDAppendID(aggs_2, lid, lidj+my0));
393           }
394         }
395       } /* local neighbors */
396     } else if (state == DELETED && lid_cprowID_1) {
397       PetscInt sgidold = (PetscInt)PetscRealPart(lid_parent_gid[lid]);
398       /* see if I have a selected ghost neighbor that will steal me */
399       if ((ix=lid_cprowID_1[lid]) != -1) {
400         ii  = matB_1->compressedrow.i; n = ii[ix+1] - ii[ix];
401         idx = matB_1->j + ii[ix];
402         for (j=0; j<n; j++) {
403           PetscInt cpid   = idx[j];
404           NState   statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
405           if (IS_SELECTED(statej) && sgidold != (PetscInt)statej) { /* ghost will steal this, remove from my list */
406             lid_parent_gid[lid] = (PetscScalar)statej; /* send who selected */
407             if (sgidold>=my0 && sgidold<Iend) { /* this was mine */
408               PetscInt     hav=0,oldslidj=sgidold-my0;
409               PetscCDIntNd *pos,*last=NULL;
410               /* remove from 'oldslidj' list */
411               PetscCall(PetscCDGetHeadPos(aggs_2,oldslidj,&pos));
412               while (pos) {
413                 PetscInt gid;
414                 PetscCall(PetscCDIntNdGetID(pos, &gid));
415                 if (lid+my0 == gid) {
416                   /* id_llist_2[lastid] = id_llist_2[flid];   /\* remove lid from oldslidj list *\/ */
417                   PetscCheck(last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"last cannot be null");
418                   PetscCall(PetscCDRemoveNextNode(aggs_2, oldslidj, last));
419                   /* ghost (PetscScalar)statej will add this later */
420                   hav = 1;
421                   break;
422                 } else last = pos;
423                 PetscCall(PetscCDGetNextPos(aggs_2,oldslidj,&pos));
424               }
425               if (hav != 1) {
426                 PetscCheck(hav,PETSC_COMM_SELF,PETSC_ERR_PLIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
427                 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"found node %D times???",hav);
428               }
429             } else {
430               /* TODO: ghosts remove this later */
431             }
432           }
433         }
434       }
435     } /* selected/deleted */
436   } /* node loop */
437 
438   if (isMPI) {
439     PetscScalar     *cpcol_2_parent,*cpcol_2_gid;
440     Vec             tempVec,ghostgids2,ghostparents2;
441     PetscInt        cpid,nghost_2;
442     PCGAMGHashTable gid_cpid;
443 
444     PetscCall(VecGetSize(mpimat_2->lvec, &nghost_2));
445     PetscCall(MatCreateVecs(Gmat_2, &tempVec, NULL));
446 
447     /* get 'cpcol_2_parent' */
448     for (kk=0,j=my0; kk<nloc; kk++,j++) {
449       PetscCall(VecSetValues(tempVec, 1, &j, &lid_parent_gid[kk], INSERT_VALUES));
450     }
451     PetscCall(VecAssemblyBegin(tempVec));
452     PetscCall(VecAssemblyEnd(tempVec));
453     PetscCall(VecDuplicate(mpimat_2->lvec, &ghostparents2));
454     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, ghostparents2,INSERT_VALUES,SCATTER_FORWARD));
455     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, ghostparents2,INSERT_VALUES,SCATTER_FORWARD));
456     PetscCall(VecGetArray(ghostparents2, &cpcol_2_parent));
457 
458     /* get 'cpcol_2_gid' */
459     for (kk=0,j=my0; kk<nloc; kk++,j++) {
460       PetscScalar v = (PetscScalar)j;
461       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
462     }
463     PetscCall(VecAssemblyBegin(tempVec));
464     PetscCall(VecAssemblyEnd(tempVec));
465     PetscCall(VecDuplicate(mpimat_2->lvec, &ghostgids2));
466     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, ghostgids2,INSERT_VALUES,SCATTER_FORWARD));
467     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, ghostgids2,INSERT_VALUES,SCATTER_FORWARD));
468     PetscCall(VecGetArray(ghostgids2, &cpcol_2_gid));
469     PetscCall(VecDestroy(&tempVec));
470 
471     /* look for deleted ghosts and add to table */
472     PetscCall(PCGAMGHashTableCreate(2*nghost_2+1, &gid_cpid));
473     for (cpid = 0; cpid < nghost_2; cpid++) {
474       NState state = (NState)PetscRealPart(cpcol_2_state[cpid]);
475       if (state==DELETED) {
476         PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
477         PetscInt sgid_old = (PetscInt)PetscRealPart(cpcol_2_par_orig[cpid]);
478         if (sgid_old == -1 && sgid_new != -1) {
479           PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
480           PetscCall(PCGAMGHashTableAdd(&gid_cpid, gid, cpid));
481         }
482       }
483     }
484 
485     /* look for deleted ghosts and see if they moved - remove it */
486     for (lid=0; lid<nloc; lid++) {
487       NState state = lid_state[lid];
488       if (IS_SELECTED(state)) {
489         PetscCDIntNd *pos,*last=NULL;
490         /* look for deleted ghosts and see if they moved */
491         PetscCall(PetscCDGetHeadPos(aggs_2,lid,&pos));
492         while (pos) {
493           PetscInt gid;
494           PetscCall(PetscCDIntNdGetID(pos, &gid));
495 
496           if (gid < my0 || gid >= Iend) {
497             PetscCall(PCGAMGHashTableFind(&gid_cpid, gid, &cpid));
498             if (cpid != -1) {
499               /* a moved ghost - */
500               /* id_llist_2[lastid] = id_llist_2[flid];    /\* remove 'flid' from list *\/ */
501               PetscCall(PetscCDRemoveNextNode(aggs_2, lid, last));
502             } else last = pos;
503           } else last = pos;
504 
505           PetscCall(PetscCDGetNextPos(aggs_2,lid,&pos));
506         } /* loop over list of deleted */
507       } /* selected */
508     }
509     PetscCall(PCGAMGHashTableDestroy(&gid_cpid));
510 
511     /* look at ghosts, see if they changed - and it */
512     for (cpid = 0; cpid < nghost_2; cpid++) {
513       PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
514       if (sgid_new >= my0 && sgid_new < Iend) { /* this is mine */
515         PetscInt     gid     = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
516         PetscInt     slid_new=sgid_new-my0,hav=0;
517         PetscCDIntNd *pos;
518 
519         /* search for this gid to see if I have it */
520         PetscCall(PetscCDGetHeadPos(aggs_2,slid_new,&pos));
521         while (pos) {
522           PetscInt gidj;
523           PetscCall(PetscCDIntNdGetID(pos, &gidj));
524           PetscCall(PetscCDGetNextPos(aggs_2,slid_new,&pos));
525 
526           if (gidj == gid) { hav = 1; break; }
527         }
528         if (hav != 1) {
529           /* insert 'flidj' into head of llist */
530           PetscCall(PetscCDAppendID(aggs_2, slid_new, gid));
531         }
532       }
533     }
534 
535     PetscCall(VecRestoreArray(mpimat_1->lvec, &cpcol_1_state));
536     PetscCall(VecRestoreArray(mpimat_2->lvec, &cpcol_2_state));
537     PetscCall(VecRestoreArray(ghostparents2, &cpcol_2_parent));
538     PetscCall(VecRestoreArray(ghostgids2, &cpcol_2_gid));
539     PetscCall(PetscFree(lid_cprowID_1));
540     PetscCall(VecDestroy(&ghostgids2));
541     PetscCall(VecDestroy(&ghostparents2));
542     PetscCall(VecDestroy(&ghost_par_orig2));
543   }
544 
545   PetscCall(PetscFree2(lid_state,lid_parent_gid));
546   PetscFunctionReturn(0);
547 }
548 
549 /* -------------------------------------------------------------------------- */
550 /*
551    PCSetData_AGG - called if data is not set with PCSetCoordinates.
552       Looks in Mat for near null space.
553       Does not work for Stokes
554 
555   Input Parameter:
556    . pc -
557    . a_A - matrix to get (near) null space out of.
558 */
559 static PetscErrorCode PCSetData_AGG(PC pc, Mat a_A)
560 {
561   PC_MG          *mg      = (PC_MG*)pc->data;
562   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
563   MatNullSpace   mnull;
564 
565   PetscFunctionBegin;
566   PetscCall(MatGetNearNullSpace(a_A, &mnull));
567   if (!mnull) {
568     DM dm;
569     PetscCall(PCGetDM(pc, &dm));
570     if (!dm) {
571       PetscCall(MatGetDM(a_A, &dm));
572     }
573     if (dm) {
574       PetscObject deformation;
575       PetscInt    Nf;
576 
577       PetscCall(DMGetNumFields(dm, &Nf));
578       if (Nf) {
579         PetscCall(DMGetField(dm, 0, NULL, &deformation));
580         PetscCall(PetscObjectQuery((PetscObject)deformation,"nearnullspace",(PetscObject*)&mnull));
581         if (!mnull) {
582           PetscCall(PetscObjectQuery((PetscObject)deformation,"nullspace",(PetscObject*)&mnull));
583         }
584       }
585     }
586   }
587 
588   if (!mnull) {
589     PetscInt bs,NN,MM;
590     PetscCall(MatGetBlockSize(a_A, &bs));
591     PetscCall(MatGetLocalSize(a_A, &MM, &NN));
592     PetscCheckFalse(MM % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MM %D must be divisible by bs %D",MM,bs);
593     PetscCall(PCSetCoordinates_AGG(pc, bs, MM/bs, NULL));
594   } else {
595     PetscReal         *nullvec;
596     PetscBool         has_const;
597     PetscInt          i,j,mlocal,nvec,bs;
598     const Vec         *vecs;
599     const PetscScalar *v;
600 
601     PetscCall(MatGetLocalSize(a_A,&mlocal,NULL));
602     PetscCall(MatNullSpaceGetVecs(mnull, &has_const, &nvec, &vecs));
603     pc_gamg->data_sz = (nvec+!!has_const)*mlocal;
604     PetscCall(PetscMalloc1((nvec+!!has_const)*mlocal,&nullvec));
605     if (has_const) for (i=0; i<mlocal; i++) nullvec[i] = 1.0;
606     for (i=0; i<nvec; i++) {
607       PetscCall(VecGetArrayRead(vecs[i],&v));
608       for (j=0; j<mlocal; j++) nullvec[(i+!!has_const)*mlocal + j] = PetscRealPart(v[j]);
609       PetscCall(VecRestoreArrayRead(vecs[i],&v));
610     }
611     pc_gamg->data           = nullvec;
612     pc_gamg->data_cell_cols = (nvec+!!has_const);
613     PetscCall(MatGetBlockSize(a_A, &bs));
614     pc_gamg->data_cell_rows = bs;
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 /* -------------------------------------------------------------------------- */
620 /*
621  formProl0
622 
623    Input Parameter:
624    . agg_llists - list of arrays with aggregates -- list from selected vertices of aggregate unselected vertices
625    . bs - row block size
626    . nSAvec - column bs of new P
627    . my0crs - global index of start of locals
628    . data_stride - bs*(nloc nodes + ghost nodes) [data_stride][nSAvec]
629    . data_in[data_stride*nSAvec] - local data on fine grid
630    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
631   Output Parameter:
632    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
633    . a_Prol - prolongation operator
634 */
635 static PetscErrorCode formProl0(PetscCoarsenData *agg_llists,PetscInt bs,PetscInt nSAvec,PetscInt my0crs,PetscInt data_stride,PetscReal data_in[],const PetscInt flid_fgid[],PetscReal **a_data_out,Mat a_Prol)
636 {
637   PetscInt        Istart,my0,Iend,nloc,clid,flid = 0,aggID,kk,jj,ii,mm,nSelected,minsz,nghosts,out_data_stride;
638   MPI_Comm        comm;
639   PetscReal       *out_data;
640   PetscCDIntNd    *pos;
641   PCGAMGHashTable fgid_flid;
642 
643   PetscFunctionBegin;
644   PetscCall(PetscObjectGetComm((PetscObject)a_Prol,&comm));
645   PetscCall(MatGetOwnershipRange(a_Prol, &Istart, &Iend));
646   nloc = (Iend-Istart)/bs; my0 = Istart/bs;
647   PetscCheckFalse((Iend-Istart) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Iend %D - Istart %D must be divisible by bs %D",Iend,Istart,bs);
648   Iend   /= bs;
649   nghosts = data_stride/bs - nloc;
650 
651   PetscCall(PCGAMGHashTableCreate(2*nghosts+1, &fgid_flid));
652   for (kk=0; kk<nghosts; kk++) {
653     PetscCall(PCGAMGHashTableAdd(&fgid_flid, flid_fgid[nloc+kk], nloc+kk));
654   }
655 
656   /* count selected -- same as number of cols of P */
657   for (nSelected=mm=0; mm<nloc; mm++) {
658     PetscBool ise;
659     PetscCall(PetscCDEmptyAt(agg_llists, mm, &ise));
660     if (!ise) nSelected++;
661   }
662   PetscCall(MatGetOwnershipRangeColumn(a_Prol, &ii, &jj));
663   PetscCheckFalse((ii/nSAvec) != my0crs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D /nSAvec %D  != my0crs %D",ii,nSAvec,my0crs);
664   PetscCheckFalse(nSelected != (jj-ii)/nSAvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nSelected %D != (jj %D - ii %D)/nSAvec %D",nSelected,jj,ii,nSAvec);
665 
666   /* aloc space for coarse point data (output) */
667   out_data_stride = nSelected*nSAvec;
668 
669   PetscCall(PetscMalloc1(out_data_stride*nSAvec, &out_data));
670   for (ii=0;ii<out_data_stride*nSAvec;ii++) out_data[ii]=PETSC_MAX_REAL;
671   *a_data_out = out_data; /* output - stride nSelected*nSAvec */
672 
673   /* find points and set prolongation */
674   minsz = 100;
675   for (mm = clid = 0; mm < nloc; mm++) {
676     PetscCall(PetscCDSizeAt(agg_llists, mm, &jj));
677     if (jj > 0) {
678       const PetscInt lid = mm, cgid = my0crs + clid;
679       PetscInt       cids[100]; /* max bs */
680       PetscBLASInt   asz  =jj,M=asz*bs,N=nSAvec,INFO;
681       PetscBLASInt   Mdata=M+((N-M>0) ? N-M : 0),LDA=Mdata,LWORK=N*bs;
682       PetscScalar    *qqc,*qqr,*TAU,*WORK;
683       PetscInt       *fids;
684       PetscReal      *data;
685 
686       /* count agg */
687       if (asz<minsz) minsz = asz;
688 
689       /* get block */
690       PetscCall(PetscMalloc5(Mdata*N, &qqc,M*N, &qqr,N, &TAU,LWORK, &WORK,M, &fids));
691 
692       aggID = 0;
693       PetscCall(PetscCDGetHeadPos(agg_llists,lid,&pos));
694       while (pos) {
695         PetscInt gid1;
696         PetscCall(PetscCDIntNdGetID(pos, &gid1));
697         PetscCall(PetscCDGetNextPos(agg_llists,lid,&pos));
698 
699         if (gid1 >= my0 && gid1 < Iend) flid = gid1 - my0;
700         else {
701           PetscCall(PCGAMGHashTableFind(&fgid_flid, gid1, &flid));
702           PetscCheckFalse(flid < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot find gid1 in table");
703         }
704         /* copy in B_i matrix - column oriented */
705         data = &data_in[flid*bs];
706         for (ii = 0; ii < bs; ii++) {
707           for (jj = 0; jj < N; jj++) {
708             PetscReal d = data[jj*data_stride + ii];
709             qqc[jj*Mdata + aggID*bs + ii] = d;
710           }
711         }
712         /* set fine IDs */
713         for (kk=0; kk<bs; kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk;
714         aggID++;
715       }
716 
717       /* pad with zeros */
718       for (ii = asz*bs; ii < Mdata; ii++) {
719         for (jj = 0; jj < N; jj++, kk++) {
720           qqc[jj*Mdata + ii] = .0;
721         }
722       }
723 
724       /* QR */
725       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
726       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
727       PetscCall(PetscFPTrapPop());
728       PetscCheckFalse(INFO != 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"xGEQRF error");
729       /* get R - column oriented - output B_{i+1} */
730       {
731         PetscReal *data = &out_data[clid*nSAvec];
732         for (jj = 0; jj < nSAvec; jj++) {
733           for (ii = 0; ii < nSAvec; ii++) {
734             PetscCheckFalse(data[jj*out_data_stride + ii] != PETSC_MAX_REAL,PETSC_COMM_SELF,PETSC_ERR_PLIB,"data[jj*out_data_stride + ii] != %e",(double)PETSC_MAX_REAL);
735            if (ii <= jj) data[jj*out_data_stride + ii] = PetscRealPart(qqc[jj*Mdata + ii]);
736            else data[jj*out_data_stride + ii] = 0.;
737           }
738         }
739       }
740 
741       /* get Q - row oriented */
742       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
743       PetscCheckFalse(INFO != 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"xORGQR error arg %d",-INFO);
744 
745       for (ii = 0; ii < M; ii++) {
746         for (jj = 0; jj < N; jj++) {
747           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
748         }
749       }
750 
751       /* add diagonal block of P0 */
752       for (kk=0; kk<N; kk++) {
753         cids[kk] = N*cgid + kk; /* global col IDs in P0 */
754       }
755       PetscCall(MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES));
756       PetscCall(PetscFree5(qqc,qqr,TAU,WORK,fids));
757       clid++;
758     } /* coarse agg */
759   } /* for all fine nodes */
760   PetscCall(MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY));
761   PetscCall(MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY));
762   PetscCall(PCGAMGHashTableDestroy(&fgid_flid));
763   PetscFunctionReturn(0);
764 }
765 
766 static PetscErrorCode PCView_GAMG_AGG(PC pc,PetscViewer viewer)
767 {
768   PC_MG          *mg      = (PC_MG*)pc->data;
769   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
770   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
771 
772   PetscFunctionBegin;
773   PetscCall(PetscViewerASCIIPrintf(viewer,"      AGG specific options\n"));
774   PetscCall(PetscViewerASCIIPrintf(viewer,"        Symmetric graph %s\n",pc_gamg_agg->sym_graph ? "true" : "false"));
775   PetscCall(PetscViewerASCIIPrintf(viewer,"        Number of levels to square graph %D\n",pc_gamg_agg->square_graph));
776   PetscCall(PetscViewerASCIIPrintf(viewer,"        Number smoothing steps %D\n",pc_gamg_agg->nsmooths));
777   PetscFunctionReturn(0);
778 }
779 
780 /* -------------------------------------------------------------------------- */
781 /*
782    PCGAMGGraph_AGG
783 
784   Input Parameter:
785    . pc - this
786    . Amat - matrix on this fine level
787   Output Parameter:
788    . a_Gmat -
789 */
790 static PetscErrorCode PCGAMGGraph_AGG(PC pc,Mat Amat,Mat *a_Gmat)
791 {
792   PC_MG                     *mg          = (PC_MG*)pc->data;
793   PC_GAMG                   *pc_gamg     = (PC_GAMG*)mg->innerctx;
794   const PetscReal           vfilter      = pc_gamg->threshold[pc_gamg->current_level];
795   PC_GAMG_AGG               *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
796   Mat                       Gmat;
797   MPI_Comm                  comm;
798   PetscBool /* set,flg , */ symm;
799 
800   PetscFunctionBegin;
801   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
802   PetscCall(PetscLogEventBegin(PC_GAMGGraph_AGG,0,0,0,0));
803 
804   /* PetscCall(MatIsSymmetricKnown(Amat, &set, &flg)); || !(set && flg) -- this causes lot of symm calls */
805   symm = (PetscBool)(pc_gamg_agg->sym_graph); /* && !pc_gamg_agg->square_graph; */
806 
807   PetscCall(PCGAMGCreateGraph(Amat, &Gmat));
808   PetscCall(PCGAMGFilterGraph(&Gmat, vfilter, symm));
809   *a_Gmat = Gmat;
810   PetscCall(PetscLogEventEnd(PC_GAMGGraph_AGG,0,0,0,0));
811   PetscFunctionReturn(0);
812 }
813 
814 /* -------------------------------------------------------------------------- */
815 /*
816    PCGAMGCoarsen_AGG
817 
818   Input Parameter:
819    . a_pc - this
820   Input/Output Parameter:
821    . a_Gmat1 - graph on this fine level - coarsening can change this (squares it)
822   Output Parameter:
823    . agg_lists - list of aggregates
824 */
825 static PetscErrorCode PCGAMGCoarsen_AGG(PC a_pc,Mat *a_Gmat1,PetscCoarsenData **agg_lists)
826 {
827   PC_MG          *mg          = (PC_MG*)a_pc->data;
828   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
829   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
830   Mat            mat,Gmat2, Gmat1 = *a_Gmat1;  /* squared graph */
831   IS             perm;
832   PetscInt       Istart,Iend,Ii,nloc,bs,n,m;
833   PetscInt       *permute;
834   PetscBool      *bIndexSet;
835   MatCoarsen     crs;
836   MPI_Comm       comm;
837   PetscReal      hashfact;
838   PetscInt       iSwapIndex;
839   PetscRandom    random;
840 
841   PetscFunctionBegin;
842   PetscCall(PetscLogEventBegin(PC_GAMGCoarsen_AGG,0,0,0,0));
843   PetscCall(PetscObjectGetComm((PetscObject)Gmat1,&comm));
844   PetscCall(MatGetLocalSize(Gmat1, &n, &m));
845   PetscCall(MatGetBlockSize(Gmat1, &bs));
846   PetscCheckFalse(bs != 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"bs %D must be 1",bs);
847   nloc = n/bs;
848 
849   if (pc_gamg->current_level < pc_gamg_agg->square_graph) {
850     PetscCall(PCGAMGSquareGraph_GAMG(a_pc,Gmat1,&Gmat2));
851   } else Gmat2 = Gmat1;
852 
853   /* get MIS aggs - randomize */
854   PetscCall(PetscMalloc1(nloc, &permute));
855   PetscCall(PetscCalloc1(nloc, &bIndexSet));
856   for (Ii = 0; Ii < nloc; Ii++) permute[Ii] = Ii;
857   PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&random));
858   PetscCall(MatGetOwnershipRange(Gmat1, &Istart, &Iend));
859   for (Ii = 0; Ii < nloc; Ii++) {
860     PetscCall(PetscRandomGetValueReal(random,&hashfact));
861     iSwapIndex = (PetscInt) (hashfact*nloc)%nloc;
862     if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
863       PetscInt iTemp = permute[iSwapIndex];
864       permute[iSwapIndex]   = permute[Ii];
865       permute[Ii]           = iTemp;
866       bIndexSet[iSwapIndex] = PETSC_TRUE;
867     }
868   }
869   PetscCall(PetscFree(bIndexSet));
870   PetscCall(PetscRandomDestroy(&random));
871   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm));
872   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[SET4],0,0,0,0));
873   PetscCall(MatCoarsenCreate(comm, &crs));
874   PetscCall(MatCoarsenSetFromOptions(crs));
875   PetscCall(MatCoarsenSetGreedyOrdering(crs, perm));
876   PetscCall(MatCoarsenSetAdjacency(crs, Gmat2));
877   PetscCall(MatCoarsenSetStrictAggs(crs, PETSC_TRUE));
878   PetscCall(MatCoarsenApply(crs));
879   PetscCall(MatCoarsenGetData(crs, agg_lists)); /* output */
880   PetscCall(MatCoarsenDestroy(&crs));
881 
882   PetscCall(ISDestroy(&perm));
883   PetscCall(PetscFree(permute));
884   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET4],0,0,0,0));
885 
886   /* smooth aggs */
887   if (Gmat2 != Gmat1) {
888     const PetscCoarsenData *llist = *agg_lists;
889     PetscCall(smoothAggs(a_pc,Gmat2, Gmat1, *agg_lists));
890     PetscCall(MatDestroy(&Gmat1));
891     *a_Gmat1 = Gmat2; /* output */
892     PetscCall(PetscCDGetMat(llist, &mat));
893     PetscCheck(!mat,comm,PETSC_ERR_ARG_WRONG, "Auxilary matrix with squared graph????");
894   } else {
895     const PetscCoarsenData *llist = *agg_lists;
896     /* see if we have a matrix that takes precedence (returned from MatCoarsenApply) */
897     PetscCall(PetscCDGetMat(llist, &mat));
898     if (mat) {
899       PetscCall(MatDestroy(&Gmat1));
900       *a_Gmat1 = mat; /* output */
901     }
902   }
903   PetscCall(PetscLogEventEnd(PC_GAMGCoarsen_AGG,0,0,0,0));
904   PetscFunctionReturn(0);
905 }
906 
907 /* -------------------------------------------------------------------------- */
908 /*
909  PCGAMGProlongator_AGG
910 
911  Input Parameter:
912  . pc - this
913  . Amat - matrix on this fine level
914  . Graph - used to get ghost data for nodes in
915  . agg_lists - list of aggregates
916  Output Parameter:
917  . a_P_out - prolongation operator to the next level
918  */
919 static PetscErrorCode PCGAMGProlongator_AGG(PC pc,Mat Amat,Mat Gmat,PetscCoarsenData *agg_lists,Mat *a_P_out)
920 {
921   PC_MG          *mg       = (PC_MG*)pc->data;
922   PC_GAMG        *pc_gamg  = (PC_GAMG*)mg->innerctx;
923   const PetscInt col_bs = pc_gamg->data_cell_cols;
924   PetscInt       Istart,Iend,nloc,ii,jj,kk,my0,nLocalSelected,bs;
925   Mat            Prol;
926   PetscMPIInt    size;
927   MPI_Comm       comm;
928   PetscReal      *data_w_ghost;
929   PetscInt       myCrs0, nbnodes=0, *flid_fgid;
930   MatType        mtype;
931 
932   PetscFunctionBegin;
933   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
934   PetscCheckFalse(col_bs < 1,comm,PETSC_ERR_PLIB,"Column bs cannot be less than 1");
935   PetscCall(PetscLogEventBegin(PC_GAMGProlongator_AGG,0,0,0,0));
936   PetscCallMPI(MPI_Comm_size(comm, &size));
937   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
938   PetscCall(MatGetBlockSize(Amat, &bs));
939   nloc = (Iend-Istart)/bs; my0 = Istart/bs;
940   PetscCheckFalse((Iend-Istart) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"(Iend %D - Istart %D) not divisible by bs %D",Iend,Istart,bs);
941 
942   /* get 'nLocalSelected' */
943   for (ii=0, nLocalSelected = 0; ii < nloc; ii++) {
944     PetscBool ise;
945     /* filter out singletons 0 or 1? */
946     PetscCall(PetscCDEmptyAt(agg_lists, ii, &ise));
947     if (!ise) nLocalSelected++;
948   }
949 
950   /* create prolongator, create P matrix */
951   PetscCall(MatGetType(Amat,&mtype));
952   PetscCall(MatCreate(comm, &Prol));
953   PetscCall(MatSetSizes(Prol,nloc*bs,nLocalSelected*col_bs,PETSC_DETERMINE,PETSC_DETERMINE));
954   PetscCall(MatSetBlockSizes(Prol, bs, col_bs));
955   PetscCall(MatSetType(Prol, mtype));
956   PetscCall(MatSeqAIJSetPreallocation(Prol,col_bs, NULL));
957   PetscCall(MatMPIAIJSetPreallocation(Prol,col_bs, NULL, col_bs, NULL));
958 
959   /* can get all points "removed" */
960   PetscCall(MatGetSize(Prol, &kk, &ii));
961   if (!ii) {
962     PetscCall(PetscInfo(pc,"%s: No selected points on coarse grid\n"));
963     PetscCall(MatDestroy(&Prol));
964     *a_P_out = NULL;  /* out */
965     PetscCall(PetscLogEventEnd(PC_GAMGProlongator_AGG,0,0,0,0));
966     PetscFunctionReturn(0);
967   }
968   PetscCall(PetscInfo(pc,"%s: New grid %D nodes\n",((PetscObject)pc)->prefix,ii/col_bs));
969   PetscCall(MatGetOwnershipRangeColumn(Prol, &myCrs0, &kk));
970 
971   PetscCheckFalse((kk-myCrs0) % col_bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"(kk %D -myCrs0 %D) not divisible by col_bs %D",kk,myCrs0,col_bs);
972   myCrs0 = myCrs0/col_bs;
973   PetscCheckFalse((kk/col_bs-myCrs0) != nLocalSelected,PETSC_COMM_SELF,PETSC_ERR_PLIB,"(kk %D/col_bs %D - myCrs0 %D) != nLocalSelected %D)",kk,col_bs,myCrs0,nLocalSelected);
974 
975   /* create global vector of data in 'data_w_ghost' */
976   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[SET7],0,0,0,0));
977   if (size > 1) { /*  */
978     PetscReal *tmp_gdata,*tmp_ldata,*tp2;
979     PetscCall(PetscMalloc1(nloc, &tmp_ldata));
980     for (jj = 0; jj < col_bs; jj++) {
981       for (kk = 0; kk < bs; kk++) {
982         PetscInt        ii,stride;
983         const PetscReal *tp = pc_gamg->data + jj*bs*nloc + kk;
984         for (ii = 0; ii < nloc; ii++, tp += bs) tmp_ldata[ii] = *tp;
985 
986         PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, tmp_ldata, &stride, &tmp_gdata));
987 
988         if (!jj && !kk) { /* now I know how many todal nodes - allocate */
989           PetscCall(PetscMalloc1(stride*bs*col_bs, &data_w_ghost));
990           nbnodes = bs*stride;
991         }
992         tp2 = data_w_ghost + jj*bs*stride + kk;
993         for (ii = 0; ii < stride; ii++, tp2 += bs) *tp2 = tmp_gdata[ii];
994         PetscCall(PetscFree(tmp_gdata));
995       }
996     }
997     PetscCall(PetscFree(tmp_ldata));
998   } else {
999     nbnodes      = bs*nloc;
1000     data_w_ghost = (PetscReal*)pc_gamg->data;
1001   }
1002 
1003   /* get P0 */
1004   if (size > 1) {
1005     PetscReal *fid_glid_loc,*fiddata;
1006     PetscInt  stride;
1007 
1008     PetscCall(PetscMalloc1(nloc, &fid_glid_loc));
1009     for (kk=0; kk<nloc; kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
1010     PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, fid_glid_loc, &stride, &fiddata));
1011     PetscCall(PetscMalloc1(stride, &flid_fgid));
1012     for (kk=0; kk<stride; kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1013     PetscCall(PetscFree(fiddata));
1014 
1015     PetscCheckFalse(stride != nbnodes/bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"stride %D != nbnodes %D/bs %D",stride,nbnodes,bs);
1016     PetscCall(PetscFree(fid_glid_loc));
1017   } else {
1018     PetscCall(PetscMalloc1(nloc, &flid_fgid));
1019     for (kk=0; kk<nloc; kk++) flid_fgid[kk] = my0 + kk;
1020   }
1021   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET7],0,0,0,0));
1022   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[SET8],0,0,0,0));
1023   {
1024     PetscReal *data_out = NULL;
1025     PetscCall(formProl0(agg_lists, bs, col_bs, myCrs0, nbnodes,data_w_ghost, flid_fgid, &data_out, Prol));
1026     PetscCall(PetscFree(pc_gamg->data));
1027 
1028     pc_gamg->data           = data_out;
1029     pc_gamg->data_cell_rows = col_bs;
1030     pc_gamg->data_sz        = col_bs*col_bs*nLocalSelected;
1031   }
1032   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET8],0,0,0,0));
1033   if (size > 1) PetscCall(PetscFree(data_w_ghost));
1034   PetscCall(PetscFree(flid_fgid));
1035 
1036   *a_P_out = Prol;  /* out */
1037 
1038   PetscCall(PetscLogEventEnd(PC_GAMGProlongator_AGG,0,0,0,0));
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 /* -------------------------------------------------------------------------- */
1043 /*
1044    PCGAMGOptProlongator_AGG
1045 
1046   Input Parameter:
1047    . pc - this
1048    . Amat - matrix on this fine level
1049  In/Output Parameter:
1050    . a_P - prolongation operator to the next level
1051 */
1052 static PetscErrorCode PCGAMGOptProlongator_AGG(PC pc,Mat Amat,Mat *a_P)
1053 {
1054   PC_MG          *mg          = (PC_MG*)pc->data;
1055   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
1056   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
1057   PetscInt       jj;
1058   Mat            Prol  = *a_P;
1059   MPI_Comm       comm;
1060   KSP            eksp;
1061   Vec            bb, xx;
1062   PC             epc;
1063   PetscReal      alpha, emax, emin;
1064 
1065   PetscFunctionBegin;
1066   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1067   PetscCall(PetscLogEventBegin(PC_GAMGOptProlongator_AGG,0,0,0,0));
1068 
1069   /* compute maximum singular value of operator to be used in smoother */
1070   if (0 < pc_gamg_agg->nsmooths) {
1071     /* get eigen estimates */
1072     if (pc_gamg->emax > 0) {
1073       emin = pc_gamg->emin;
1074       emax = pc_gamg->emax;
1075     } else {
1076       const char *prefix;
1077 
1078       PetscCall(MatCreateVecs(Amat, &bb, NULL));
1079       PetscCall(MatCreateVecs(Amat, &xx, NULL));
1080       PetscCall(KSPSetNoisy_Private(bb));
1081 
1082       PetscCall(KSPCreate(comm,&eksp));
1083       PetscCall(PCGetOptionsPrefix(pc,&prefix));
1084       PetscCall(KSPSetOptionsPrefix(eksp,prefix));
1085       PetscCall(KSPAppendOptionsPrefix(eksp,"pc_gamg_esteig_"));
1086       {
1087         PetscBool sflg;
1088         PetscCall(MatGetOption(Amat, MAT_SPD, &sflg));
1089         if (sflg) {
1090           PetscCall(KSPSetType(eksp, KSPCG));
1091         }
1092       }
1093       PetscCall(KSPSetErrorIfNotConverged(eksp,pc->erroriffailure));
1094       PetscCall(KSPSetNormType(eksp, KSP_NORM_NONE));
1095 
1096       PetscCall(KSPSetInitialGuessNonzero(eksp, PETSC_FALSE));
1097       PetscCall(KSPSetOperators(eksp, Amat, Amat));
1098 
1099       PetscCall(KSPGetPC(eksp, &epc));
1100       PetscCall(PCSetType(epc, PCJACOBI));  /* smoother in smoothed agg. */
1101 
1102       PetscCall(KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT, 10)); // 10 is safer, but 5 is often fine, can override with -pc_gamg_esteig_ksp_max_it -mg_levels_ksp_chebyshev_esteig 0,0.25,0,1.2
1103 
1104       PetscCall(KSPSetFromOptions(eksp));
1105       PetscCall(KSPSetComputeSingularValues(eksp,PETSC_TRUE));
1106       PetscCall(KSPSolve(eksp, bb, xx));
1107       PetscCall(KSPCheckSolve(eksp,pc,xx));
1108 
1109       PetscCall(KSPComputeExtremeSingularValues(eksp, &emax, &emin));
1110       PetscCall(PetscInfo(pc,"%s: Smooth P0: max eigen=%e min=%e PC=%s\n",((PetscObject)pc)->prefix,(double)emax,(double)emin,PCJACOBI));
1111       PetscCall(VecDestroy(&xx));
1112       PetscCall(VecDestroy(&bb));
1113       PetscCall(KSPDestroy(&eksp));
1114     }
1115     if (pc_gamg->use_sa_esteig) {
1116       mg->min_eigen_DinvA[pc_gamg->current_level] = emin;
1117       mg->max_eigen_DinvA[pc_gamg->current_level] = emax;
1118       PetscCall(PetscInfo(pc,"%s: Smooth P0: level %D, cache spectra %g %g\n",((PetscObject)pc)->prefix,pc_gamg->current_level,(double)emin,(double)emax));
1119     } else {
1120       mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1121       mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1122     }
1123   } else {
1124     mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1125     mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1126   }
1127 
1128   /* smooth P0 */
1129   for (jj = 0; jj < pc_gamg_agg->nsmooths; jj++) {
1130     Mat tMat;
1131     Vec diag;
1132 
1133     PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[SET9],0,0,0,0));
1134 
1135     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1136     PetscCall(PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2],0,0,0,0));
1137     PetscCall(MatMatMult(Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat));
1138     PetscCall(PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2],0,0,0,0));
1139     PetscCall(MatProductClear(tMat));
1140     PetscCall(MatCreateVecs(Amat, &diag, NULL));
1141     PetscCall(MatGetDiagonal(Amat, diag)); /* effectively PCJACOBI */
1142     PetscCall(VecReciprocal(diag));
1143     PetscCall(MatDiagonalScale(tMat, diag, NULL));
1144     PetscCall(VecDestroy(&diag));
1145 
1146     /* TODO: Set a PCFailedReason and exit the building of the AMG preconditioner */
1147     PetscCheckFalse(emax == 0.0,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Computed maximum singular value as zero");
1148     /* TODO: Document the 1.4 and don't hardwire it in this routine */
1149     alpha = -1.4/emax;
1150 
1151     PetscCall(MatAYPX(tMat, alpha, Prol, SUBSET_NONZERO_PATTERN));
1152     PetscCall(MatDestroy(&Prol));
1153     Prol = tMat;
1154     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[SET9],0,0,0,0));
1155   }
1156   PetscCall(PetscLogEventEnd(PC_GAMGOptProlongator_AGG,0,0,0,0));
1157   *a_P = Prol;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /* -------------------------------------------------------------------------- */
1162 /*
1163    PCCreateGAMG_AGG
1164 
1165   Input Parameter:
1166    . pc -
1167 */
1168 PetscErrorCode  PCCreateGAMG_AGG(PC pc)
1169 {
1170   PC_MG          *mg      = (PC_MG*)pc->data;
1171   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1172   PC_GAMG_AGG    *pc_gamg_agg;
1173 
1174   PetscFunctionBegin;
1175   /* create sub context for SA */
1176   PetscCall(PetscNewLog(pc,&pc_gamg_agg));
1177   pc_gamg->subctx = pc_gamg_agg;
1178 
1179   pc_gamg->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1180   pc_gamg->ops->destroy        = PCDestroy_GAMG_AGG;
1181   /* reset does not do anything; setup not virtual */
1182 
1183   /* set internal function pointers */
1184   pc_gamg->ops->graph             = PCGAMGGraph_AGG;
1185   pc_gamg->ops->coarsen           = PCGAMGCoarsen_AGG;
1186   pc_gamg->ops->prolongator       = PCGAMGProlongator_AGG;
1187   pc_gamg->ops->optprolongator    = PCGAMGOptProlongator_AGG;
1188   pc_gamg->ops->createdefaultdata = PCSetData_AGG;
1189   pc_gamg->ops->view              = PCView_GAMG_AGG;
1190 
1191   pc_gamg_agg->square_graph = 1;
1192   pc_gamg_agg->sym_graph    = PETSC_FALSE;
1193   pc_gamg_agg->nsmooths     = 1;
1194 
1195   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNSmooths_C",PCGAMGSetNSmooths_AGG));
1196   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetSymGraph_C",PCGAMGSetSymGraph_AGG));
1197   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetSquareGraph_C",PCGAMGSetSquareGraph_AGG));
1198   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_AGG));
1199   PetscFunctionReturn(0);
1200 }
1201