xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision ecceeb7d86a3b9d2c0da2aced471d46acf67b452)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/
6 #include <petscblaslapack.h>
7 #include <petscdm.h>
8 #include <petsc/private/kspimpl.h>
9 
10 typedef struct {
11   PetscInt   nsmooths;
12   PetscInt   aggressive_coarsening_levels; // number of aggressive coarsening levels (square or MISk)
13   PetscInt   aggressive_mis_k;             // the k in MIS-k
14   PetscBool  use_aggressive_square_graph;
15   PetscBool  use_minimum_degree_ordering;
16   PetscBool  use_low_mem_filter;
17   MatCoarsen crs;
18 } PC_GAMG_AGG;
19 
20 /*@
21   PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical) used for multigrid on all the levels
22 
23   Logically Collective
24 
25   Input Parameters:
26 + pc - the preconditioner context
27 - n  - the number of smooths
28 
29   Options Database Key:
30 . -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
31 
32   Level: intermediate
33 
34 .seealso: `PCMG`, `PCGAMG`
35 @*/
36 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
37 {
38   PetscFunctionBegin;
39   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
40   PetscValidLogicalCollectiveInt(pc, n, 2);
41   PetscTryMethod(pc, "PCGAMGSetNSmooths_C", (PC, PetscInt), (pc, n));
42   PetscFunctionReturn(PETSC_SUCCESS);
43 }
44 
45 static PetscErrorCode PCGAMGSetNSmooths_AGG(PC pc, PetscInt n)
46 {
47   PC_MG       *mg          = (PC_MG *)pc->data;
48   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
49   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
50 
51   PetscFunctionBegin;
52   pc_gamg_agg->nsmooths = n;
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 /*@
57   PCGAMGSetAggressiveLevels -  Use aggressive coarsening on first n levels
58 
59   Logically Collective
60 
61   Input Parameters:
62 + pc - the preconditioner context
63 - n  - 0, 1 or more
64 
65   Options Database Key:
66 . -pc_gamg_aggressive_coarsening <n,default = 1> - Number of levels to square the graph on before aggregating it
67 
68   Level: intermediate
69 
70 .seealso: `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`, `PCGAMGSetLowMemoryFilter()`
71 @*/
72 PetscErrorCode PCGAMGSetAggressiveLevels(PC pc, PetscInt n)
73 {
74   PetscFunctionBegin;
75   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
76   PetscValidLogicalCollectiveInt(pc, n, 2);
77   PetscTryMethod(pc, "PCGAMGSetAggressiveLevels_C", (PC, PetscInt), (pc, n));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /*@
82   PCGAMGMISkSetAggressive - Number (k) distance in MIS coarsening (>2 is 'aggressive')
83 
84   Logically Collective
85 
86   Input Parameters:
87 + pc - the preconditioner context
88 - n  - 1 or more (default = 2)
89 
90   Options Database Key:
91 . -pc_gamg_aggressive_mis_k <n,default=2> - Number (k) distance in MIS coarsening (>2 is 'aggressive')
92 
93   Level: intermediate
94 
95 .seealso: `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`, `PCGAMGSetLowMemoryFilter()`
96 @*/
97 PetscErrorCode PCGAMGMISkSetAggressive(PC pc, PetscInt n)
98 {
99   PetscFunctionBegin;
100   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
101   PetscValidLogicalCollectiveInt(pc, n, 2);
102   PetscTryMethod(pc, "PCGAMGMISkSetAggressive_C", (PC, PetscInt), (pc, n));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 /*@
107   PCGAMGSetAggressiveSquareGraph - Use graph square A'A for aggressive coarsening, old method
108 
109   Logically Collective
110 
111   Input Parameters:
112 + pc - the preconditioner context
113 - b  - default false - MIS-k is faster
114 
115   Options Database Key:
116 . -pc_gamg_aggressive_square_graph <bool,default=false> - Use square graph (A'A) or MIS-k (k=2) for aggressive coarsening
117 
118   Level: intermediate
119 
120 .seealso: `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGMISkSetAggressive()`, `PCGAMGMISkSetMinDegreeOrdering()`, `PCGAMGSetLowMemoryFilter()`
121 @*/
122 PetscErrorCode PCGAMGSetAggressiveSquareGraph(PC pc, PetscBool b)
123 {
124   PetscFunctionBegin;
125   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
126   PetscValidLogicalCollectiveBool(pc, b, 2);
127   PetscTryMethod(pc, "PCGAMGSetAggressiveSquareGraph_C", (PC, PetscBool), (pc, b));
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 /*@
132   PCGAMGMISkSetMinDegreeOrdering - Use minimum degree ordering in greedy MIS algorithm
133 
134   Logically Collective
135 
136   Input Parameters:
137 + pc - the preconditioner context
138 - b  - default true
139 
140   Options Database Key:
141 . -pc_gamg_mis_k_minimum_degree_ordering <bool,default=true> - Use minimum degree ordering in greedy MIS algorithm
142 
143   Level: intermediate
144 
145 .seealso: `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGSetLowMemoryFilter()`
146 @*/
147 PetscErrorCode PCGAMGMISkSetMinDegreeOrdering(PC pc, PetscBool b)
148 {
149   PetscFunctionBegin;
150   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
151   PetscValidLogicalCollectiveBool(pc, b, 2);
152   PetscTryMethod(pc, "PCGAMGMISkSetMinDegreeOrdering_C", (PC, PetscBool), (pc, b));
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 /*@
157   PCGAMGSetLowMemoryFilter - Use low memory graph/matrix filter
158 
159   Logically Collective
160 
161   Input Parameters:
162 + pc - the preconditioner context
163 - b  - default false
164 
165   Options Database Key:
166 . -pc_gamg_low_memory_threshold_filter <bool,default=false> - Use low memory graph/matrix filter
167 
168   Level: intermediate
169 
170 .seealso: `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`
171 @*/
172 PetscErrorCode PCGAMGSetLowMemoryFilter(PC pc, PetscBool b)
173 {
174   PetscFunctionBegin;
175   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
176   PetscValidLogicalCollectiveBool(pc, b, 2);
177   PetscTryMethod(pc, "PCGAMGSetLowMemoryFilter_C", (PC, PetscBool), (pc, b));
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode PCGAMGSetAggressiveLevels_AGG(PC pc, PetscInt n)
182 {
183   PC_MG       *mg          = (PC_MG *)pc->data;
184   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
185   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
186 
187   PetscFunctionBegin;
188   pc_gamg_agg->aggressive_coarsening_levels = n;
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 static PetscErrorCode PCGAMGMISkSetAggressive_AGG(PC pc, PetscInt n)
193 {
194   PC_MG       *mg          = (PC_MG *)pc->data;
195   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
196   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
197 
198   PetscFunctionBegin;
199   pc_gamg_agg->aggressive_mis_k = n;
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode PCGAMGSetAggressiveSquareGraph_AGG(PC pc, PetscBool b)
204 {
205   PC_MG       *mg          = (PC_MG *)pc->data;
206   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
207   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
208 
209   PetscFunctionBegin;
210   pc_gamg_agg->use_aggressive_square_graph = b;
211   PetscFunctionReturn(PETSC_SUCCESS);
212 }
213 
214 static PetscErrorCode PCGAMGSetLowMemoryFilter_AGG(PC pc, PetscBool b)
215 {
216   PC_MG       *mg          = (PC_MG *)pc->data;
217   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
218   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
219 
220   PetscFunctionBegin;
221   pc_gamg_agg->use_low_mem_filter = b;
222   PetscFunctionReturn(PETSC_SUCCESS);
223 }
224 
225 static PetscErrorCode PCGAMGMISkSetMinDegreeOrdering_AGG(PC pc, PetscBool b)
226 {
227   PC_MG       *mg          = (PC_MG *)pc->data;
228   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
229   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
230 
231   PetscFunctionBegin;
232   pc_gamg_agg->use_minimum_degree_ordering = b;
233   PetscFunctionReturn(PETSC_SUCCESS);
234 }
235 
236 static PetscErrorCode PCSetFromOptions_GAMG_AGG(PC pc, PetscOptionItems *PetscOptionsObject)
237 {
238   PC_MG       *mg          = (PC_MG *)pc->data;
239   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
240   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
241   PetscBool    n_aggressive_flg, old_sq_provided = PETSC_FALSE, new_sq_provided = PETSC_FALSE, new_sqr_graph = pc_gamg_agg->use_aggressive_square_graph;
242   PetscInt     nsq_graph_old = 0;
243 
244   PetscFunctionBegin;
245   PetscOptionsHeadBegin(PetscOptionsObject, "GAMG-AGG options");
246   PetscCall(PetscOptionsInt("-pc_gamg_agg_nsmooths", "smoothing steps for smoothed aggregation, usually 1", "PCGAMGSetNSmooths", pc_gamg_agg->nsmooths, &pc_gamg_agg->nsmooths, NULL));
247   // aggressive coarsening logic with deprecated -pc_gamg_square_graph
248   PetscCall(PetscOptionsInt("-pc_gamg_aggressive_coarsening", "Number of aggressive coarsening (MIS-2) levels from finest", "PCGAMGSetAggressiveLevels", pc_gamg_agg->aggressive_coarsening_levels, &pc_gamg_agg->aggressive_coarsening_levels, &n_aggressive_flg));
249   if (!n_aggressive_flg)
250     PetscCall(PetscOptionsInt("-pc_gamg_square_graph", "Number of aggressive coarsening (MIS-2) levels from finest (deprecated alias for -pc_gamg_aggressive_coarsening)", "PCGAMGSetAggressiveLevels", nsq_graph_old, &nsq_graph_old, &old_sq_provided));
251   PetscCall(PetscOptionsBool("-pc_gamg_aggressive_square_graph", "Use square graph (A'A) or MIS-k (k=2) for aggressive coarsening", "PCGAMGSetAggressiveSquareGraph", new_sqr_graph, &pc_gamg_agg->use_aggressive_square_graph, &new_sq_provided));
252   if (!new_sq_provided && old_sq_provided) {
253     pc_gamg_agg->aggressive_coarsening_levels = nsq_graph_old; // could be zero
254     pc_gamg_agg->use_aggressive_square_graph  = PETSC_TRUE;
255   }
256   if (new_sq_provided && old_sq_provided)
257     PetscCall(PetscInfo(pc, "Warning: both -pc_gamg_square_graph and -pc_gamg_aggressive_coarsening are used. -pc_gamg_square_graph is deprecated, Number of aggressive levels is %d\n", (int)pc_gamg_agg->aggressive_coarsening_levels));
258   PetscCall(PetscOptionsBool("-pc_gamg_mis_k_minimum_degree_ordering", "Use minimum degree ordering for greedy MIS", "PCGAMGMISkSetMinDegreeOrdering", pc_gamg_agg->use_minimum_degree_ordering, &pc_gamg_agg->use_minimum_degree_ordering, NULL));
259   PetscCall(PetscOptionsBool("-pc_gamg_low_memory_threshold_filter", "Use the (built-in) low memory graph/matrix filter", "PCGAMGSetLowMemoryFilter", pc_gamg_agg->use_low_mem_filter, &pc_gamg_agg->use_low_mem_filter, NULL));
260   PetscCall(PetscOptionsInt("-pc_gamg_aggressive_mis_k", "Number of levels of multigrid to use.", "PCGAMGMISkSetAggressive", pc_gamg_agg->aggressive_mis_k, &pc_gamg_agg->aggressive_mis_k, NULL));
261   PetscOptionsHeadEnd();
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode PCDestroy_GAMG_AGG(PC pc)
266 {
267   PC_MG   *mg      = (PC_MG *)pc->data;
268   PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
269 
270   PetscFunctionBegin;
271   PetscCall(PetscFree(pc_gamg->subctx));
272   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetNSmooths_C", NULL));
273   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveLevels_C", NULL));
274   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetAggressive_C", NULL));
275   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetMinDegreeOrdering_C", NULL));
276   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetLowMemoryFilter_C", NULL));
277   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveSquareGraph_C", NULL));
278   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 /*
283    PCSetCoordinates_AGG
284 
285    Collective
286 
287    Input Parameter:
288    . pc - the preconditioner context
289    . ndm - dimension of data (used for dof/vertex for Stokes)
290    . a_nloc - number of vertices local
291    . coords - [a_nloc][ndm] - interleaved coordinate data: {x_0, y_0, z_0, x_1, y_1, ...}
292 */
293 
294 static PetscErrorCode PCSetCoordinates_AGG(PC pc, PetscInt ndm, PetscInt a_nloc, PetscReal *coords)
295 {
296   PC_MG   *mg      = (PC_MG *)pc->data;
297   PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
298   PetscInt arrsz, kk, ii, jj, nloc, ndatarows, ndf;
299   Mat      mat = pc->pmat;
300 
301   PetscFunctionBegin;
302   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
303   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
304   nloc = a_nloc;
305 
306   /* SA: null space vectors */
307   PetscCall(MatGetBlockSize(mat, &ndf));               /* this does not work for Stokes */
308   if (coords && ndf == 1) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
309   else if (coords) {
310     PetscCheck(ndm <= ndf, PETSC_COMM_SELF, PETSC_ERR_PLIB, "degrees of motion %" PetscInt_FMT " > block size %" PetscInt_FMT, ndm, ndf);
311     pc_gamg->data_cell_cols = (ndm == 2 ? 3 : 6); /* displacement elasticity */
312     if (ndm != ndf) PetscCheck(pc_gamg->data_cell_cols == ndf, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Don't know how to create null space for ndm=%" PetscInt_FMT ", ndf=%" PetscInt_FMT ".  Use MatSetNearNullSpace().", ndm, ndf);
313   } else pc_gamg->data_cell_cols = ndf; /* no data, force SA with constant null space vectors */
314   pc_gamg->data_cell_rows = ndatarows = ndf;
315   PetscCheck(pc_gamg->data_cell_cols > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "pc_gamg->data_cell_cols %" PetscInt_FMT " <= 0", pc_gamg->data_cell_cols);
316   arrsz = nloc * pc_gamg->data_cell_rows * pc_gamg->data_cell_cols;
317 
318   if (!pc_gamg->data || (pc_gamg->data_sz != arrsz)) {
319     PetscCall(PetscFree(pc_gamg->data));
320     PetscCall(PetscMalloc1(arrsz + 1, &pc_gamg->data));
321   }
322   /* copy data in - column oriented */
323   for (kk = 0; kk < nloc; kk++) {
324     const PetscInt M    = nloc * pc_gamg->data_cell_rows; /* stride into data */
325     PetscReal     *data = &pc_gamg->data[kk * ndatarows]; /* start of cell */
326     if (pc_gamg->data_cell_cols == 1) *data = 1.0;
327     else {
328       /* translational modes */
329       for (ii = 0; ii < ndatarows; ii++) {
330         for (jj = 0; jj < ndatarows; jj++) {
331           if (ii == jj) data[ii * M + jj] = 1.0;
332           else data[ii * M + jj] = 0.0;
333         }
334       }
335 
336       /* rotational modes */
337       if (coords) {
338         if (ndm == 2) {
339           data += 2 * M;
340           data[0] = -coords[2 * kk + 1];
341           data[1] = coords[2 * kk];
342         } else {
343           data += 3 * M;
344           data[0]         = 0.0;
345           data[M + 0]     = coords[3 * kk + 2];
346           data[2 * M + 0] = -coords[3 * kk + 1];
347           data[1]         = -coords[3 * kk + 2];
348           data[M + 1]     = 0.0;
349           data[2 * M + 1] = coords[3 * kk];
350           data[2]         = coords[3 * kk + 1];
351           data[M + 2]     = -coords[3 * kk];
352           data[2 * M + 2] = 0.0;
353         }
354       }
355     }
356   }
357   pc_gamg->data_sz = arrsz;
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 /*
362    PCSetData_AGG - called if data is not set with PCSetCoordinates.
363       Looks in Mat for near null space.
364       Does not work for Stokes
365 
366   Input Parameter:
367    . pc -
368    . a_A - matrix to get (near) null space out of.
369 */
370 static PetscErrorCode PCSetData_AGG(PC pc, Mat a_A)
371 {
372   PC_MG       *mg      = (PC_MG *)pc->data;
373   PC_GAMG     *pc_gamg = (PC_GAMG *)mg->innerctx;
374   MatNullSpace mnull;
375 
376   PetscFunctionBegin;
377   PetscCall(MatGetNearNullSpace(a_A, &mnull));
378   if (!mnull) {
379     DM dm;
380     PetscCall(PCGetDM(pc, &dm));
381     if (!dm) PetscCall(MatGetDM(a_A, &dm));
382     if (dm) {
383       PetscObject deformation;
384       PetscInt    Nf;
385 
386       PetscCall(DMGetNumFields(dm, &Nf));
387       if (Nf) {
388         PetscCall(DMGetField(dm, 0, NULL, &deformation));
389         PetscCall(PetscObjectQuery((PetscObject)deformation, "nearnullspace", (PetscObject *)&mnull));
390         if (!mnull) PetscCall(PetscObjectQuery((PetscObject)deformation, "nullspace", (PetscObject *)&mnull));
391       }
392     }
393   }
394 
395   if (!mnull) {
396     PetscInt bs, NN, MM;
397     PetscCall(MatGetBlockSize(a_A, &bs));
398     PetscCall(MatGetLocalSize(a_A, &MM, &NN));
399     PetscCheck(MM % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MM %" PetscInt_FMT " must be divisible by bs %" PetscInt_FMT, MM, bs);
400     PetscCall(PCSetCoordinates_AGG(pc, bs, MM / bs, NULL));
401   } else {
402     PetscReal         *nullvec;
403     PetscBool          has_const;
404     PetscInt           i, j, mlocal, nvec, bs;
405     const Vec         *vecs;
406     const PetscScalar *v;
407 
408     PetscCall(MatGetLocalSize(a_A, &mlocal, NULL));
409     PetscCall(MatNullSpaceGetVecs(mnull, &has_const, &nvec, &vecs));
410     for (i = 0; i < nvec; i++) {
411       PetscCall(VecGetLocalSize(vecs[i], &j));
412       PetscCheck(j == mlocal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Attached null space vector size %" PetscInt_FMT " != matrix size %" PetscInt_FMT, j, mlocal);
413     }
414     pc_gamg->data_sz = (nvec + !!has_const) * mlocal;
415     PetscCall(PetscMalloc1((nvec + !!has_const) * mlocal, &nullvec));
416     if (has_const)
417       for (i = 0; i < mlocal; i++) nullvec[i] = 1.0;
418     for (i = 0; i < nvec; i++) {
419       PetscCall(VecGetArrayRead(vecs[i], &v));
420       for (j = 0; j < mlocal; j++) nullvec[(i + !!has_const) * mlocal + j] = PetscRealPart(v[j]);
421       PetscCall(VecRestoreArrayRead(vecs[i], &v));
422     }
423     pc_gamg->data           = nullvec;
424     pc_gamg->data_cell_cols = (nvec + !!has_const);
425     PetscCall(MatGetBlockSize(a_A, &bs));
426     pc_gamg->data_cell_rows = bs;
427   }
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 /*
432   formProl0 - collect null space data for each aggregate, do QR, put R in coarse grid data and Q in P_0
433 
434   Input Parameter:
435    . agg_llists - list of arrays with aggregates -- list from selected vertices of aggregate unselected vertices
436    . bs - row block size
437    . nSAvec - column bs of new P
438    . my0crs - global index of start of locals
439    . data_stride - bs*(nloc nodes + ghost nodes) [data_stride][nSAvec]
440    . data_in[data_stride*nSAvec] - local data on fine grid
441    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
442 
443   Output Parameter:
444    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
445    . a_Prol - prolongation operator
446 */
447 static PetscErrorCode formProl0(PetscCoarsenData *agg_llists, PetscInt bs, PetscInt nSAvec, PetscInt my0crs, PetscInt data_stride, PetscReal data_in[], const PetscInt flid_fgid[], PetscReal **a_data_out, Mat a_Prol)
448 {
449   PetscInt        Istart, my0, Iend, nloc, clid, flid = 0, aggID, kk, jj, ii, mm, nSelected, minsz, nghosts, out_data_stride;
450   MPI_Comm        comm;
451   PetscReal      *out_data;
452   PetscCDIntNd   *pos;
453   PCGAMGHashTable fgid_flid;
454 
455   PetscFunctionBegin;
456   PetscCall(PetscObjectGetComm((PetscObject)a_Prol, &comm));
457   PetscCall(MatGetOwnershipRange(a_Prol, &Istart, &Iend));
458   nloc = (Iend - Istart) / bs;
459   my0  = Istart / bs;
460   PetscCheck((Iend - Istart) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Iend %" PetscInt_FMT " - Istart %" PetscInt_FMT " must be divisible by bs %" PetscInt_FMT, Iend, Istart, bs);
461   Iend /= bs;
462   nghosts = data_stride / bs - nloc;
463 
464   PetscCall(PCGAMGHashTableCreate(2 * nghosts + 1, &fgid_flid));
465   for (kk = 0; kk < nghosts; kk++) PetscCall(PCGAMGHashTableAdd(&fgid_flid, flid_fgid[nloc + kk], nloc + kk));
466 
467   /* count selected -- same as number of cols of P */
468   for (nSelected = mm = 0; mm < nloc; mm++) {
469     PetscBool ise;
470     PetscCall(PetscCDIsEmptyAt(agg_llists, mm, &ise));
471     if (!ise) nSelected++;
472   }
473   PetscCall(MatGetOwnershipRangeColumn(a_Prol, &ii, &jj));
474   PetscCheck((ii / nSAvec) == my0crs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "ii %" PetscInt_FMT " /nSAvec %" PetscInt_FMT "  != my0crs %" PetscInt_FMT, ii, nSAvec, my0crs);
475   PetscCheck(nSelected == (jj - ii) / nSAvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nSelected %" PetscInt_FMT " != (jj %" PetscInt_FMT " - ii %" PetscInt_FMT ")/nSAvec %" PetscInt_FMT, nSelected, jj, ii, nSAvec);
476 
477   /* aloc space for coarse point data (output) */
478   out_data_stride = nSelected * nSAvec;
479 
480   PetscCall(PetscMalloc1(out_data_stride * nSAvec, &out_data));
481   for (ii = 0; ii < out_data_stride * nSAvec; ii++) out_data[ii] = PETSC_MAX_REAL;
482   *a_data_out = out_data; /* output - stride nSelected*nSAvec */
483 
484   /* find points and set prolongation */
485   minsz = 100;
486   for (mm = clid = 0; mm < nloc; mm++) {
487     PetscCall(PetscCDCountAt(agg_llists, mm, &jj));
488     if (jj > 0) {
489       const PetscInt lid = mm, cgid = my0crs + clid;
490       PetscInt       cids[100]; /* max bs */
491       PetscBLASInt   asz = jj, M = asz * bs, N = nSAvec, INFO;
492       PetscBLASInt   Mdata = M + ((N - M > 0) ? N - M : 0), LDA = Mdata, LWORK = N * bs;
493       PetscScalar   *qqc, *qqr, *TAU, *WORK;
494       PetscInt      *fids;
495       PetscReal     *data;
496 
497       /* count agg */
498       if (asz < minsz) minsz = asz;
499 
500       /* get block */
501       PetscCall(PetscMalloc5(Mdata * N, &qqc, M * N, &qqr, N, &TAU, LWORK, &WORK, M, &fids));
502 
503       aggID = 0;
504       PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
505       while (pos) {
506         PetscInt gid1;
507         PetscCall(PetscCDIntNdGetID(pos, &gid1));
508         PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
509 
510         if (gid1 >= my0 && gid1 < Iend) flid = gid1 - my0;
511         else {
512           PetscCall(PCGAMGHashTableFind(&fgid_flid, gid1, &flid));
513           PetscCheck(flid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot find gid1 in table");
514         }
515         /* copy in B_i matrix - column oriented */
516         data = &data_in[flid * bs];
517         for (ii = 0; ii < bs; ii++) {
518           for (jj = 0; jj < N; jj++) {
519             PetscReal d                       = data[jj * data_stride + ii];
520             qqc[jj * Mdata + aggID * bs + ii] = d;
521           }
522         }
523         /* set fine IDs */
524         for (kk = 0; kk < bs; kk++) fids[aggID * bs + kk] = flid_fgid[flid] * bs + kk;
525         aggID++;
526       }
527 
528       /* pad with zeros */
529       for (ii = asz * bs; ii < Mdata; ii++) {
530         for (jj = 0; jj < N; jj++, kk++) qqc[jj * Mdata + ii] = .0;
531       }
532 
533       /* QR */
534       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
535       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
536       PetscCall(PetscFPTrapPop());
537       PetscCheck(INFO == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xGEQRF error");
538       /* get R - column oriented - output B_{i+1} */
539       {
540         PetscReal *data = &out_data[clid * nSAvec];
541         for (jj = 0; jj < nSAvec; jj++) {
542           for (ii = 0; ii < nSAvec; ii++) {
543             PetscCheck(data[jj * out_data_stride + ii] == PETSC_MAX_REAL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "data[jj*out_data_stride + ii] != %e", (double)PETSC_MAX_REAL);
544             if (ii <= jj) data[jj * out_data_stride + ii] = PetscRealPart(qqc[jj * Mdata + ii]);
545             else data[jj * out_data_stride + ii] = 0.;
546           }
547         }
548       }
549 
550       /* get Q - row oriented */
551       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
552       PetscCheck(INFO == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xORGQR error arg %" PetscBLASInt_FMT, -INFO);
553 
554       for (ii = 0; ii < M; ii++) {
555         for (jj = 0; jj < N; jj++) qqr[N * ii + jj] = qqc[jj * Mdata + ii];
556       }
557 
558       /* add diagonal block of P0 */
559       for (kk = 0; kk < N; kk++) { cids[kk] = N * cgid + kk; /* global col IDs in P0 */ }
560       PetscCall(MatSetValues(a_Prol, M, fids, N, cids, qqr, INSERT_VALUES));
561       PetscCall(PetscFree5(qqc, qqr, TAU, WORK, fids));
562       clid++;
563     } /* coarse agg */
564   }   /* for all fine nodes */
565   PetscCall(MatAssemblyBegin(a_Prol, MAT_FINAL_ASSEMBLY));
566   PetscCall(MatAssemblyEnd(a_Prol, MAT_FINAL_ASSEMBLY));
567   PetscCall(PCGAMGHashTableDestroy(&fgid_flid));
568   PetscFunctionReturn(PETSC_SUCCESS);
569 }
570 
571 static PetscErrorCode PCView_GAMG_AGG(PC pc, PetscViewer viewer)
572 {
573   PC_MG       *mg          = (PC_MG *)pc->data;
574   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
575   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscViewerASCIIPrintf(viewer, "      AGG specific options\n"));
579   PetscCall(PetscViewerASCIIPrintf(viewer, "        Number of levels of aggressive coarsening %d\n", (int)pc_gamg_agg->aggressive_coarsening_levels));
580   if (pc_gamg_agg->aggressive_coarsening_levels > 0) {
581     PetscCall(PetscViewerASCIIPrintf(viewer, "        %s aggressive coarsening\n", !pc_gamg_agg->use_aggressive_square_graph ? "MIS-k" : "Square graph"));
582     if (!pc_gamg_agg->use_aggressive_square_graph) PetscCall(PetscViewerASCIIPrintf(viewer, "        MIS-%d coarsening on aggressive levels\n", (int)pc_gamg_agg->aggressive_mis_k));
583   }
584   PetscCall(PetscViewerASCIIPrintf(viewer, "        Number smoothing steps %d\n", (int)pc_gamg_agg->nsmooths));
585   PetscFunctionReturn(PETSC_SUCCESS);
586 }
587 
588 static PetscErrorCode PCGAMGCreateGraph_AGG(PC pc, Mat Amat, Mat *a_Gmat)
589 {
590   PC_MG          *mg          = (PC_MG *)pc->data;
591   PC_GAMG        *pc_gamg     = (PC_GAMG *)mg->innerctx;
592   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
593   const PetscReal vfilter     = pc_gamg->threshold[pc_gamg->current_level];
594   PetscBool       ishem, ismis;
595   const char     *prefix;
596   MatInfo         info0, info1;
597   PetscInt        bs;
598 
599   PetscFunctionBegin;
600   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
601   /* Note: depending on the algorithm that will be used for computing the coarse grid points this should pass PETSC_TRUE or PETSC_FALSE as the first argument */
602   /* MATCOARSENHEM requires numerical weights for edges so ensure they are computed */
603   PetscCall(MatCoarsenCreate(PetscObjectComm((PetscObject)pc), &pc_gamg_agg->crs));
604   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)pc, &prefix));
605   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)pc_gamg_agg->crs, prefix));
606   PetscCall(MatCoarsenSetFromOptions(pc_gamg_agg->crs));
607   PetscCall(PetscObjectTypeCompare((PetscObject)pc_gamg_agg->crs, MATCOARSENHEM, &ishem));
608   if (ishem) {
609     if (pc_gamg_agg->aggressive_coarsening_levels) PetscCall(PetscInfo(pc, "HEM and aggressive coarsening ignored: HEM using %d iterations\n", (int)pc_gamg_agg->crs->max_it));
610     pc_gamg_agg->aggressive_coarsening_levels = 0;                                         // aggressive and HEM does not make sense
611     PetscCall(MatCoarsenSetMaximumIterations(pc_gamg_agg->crs, pc_gamg_agg->crs->max_it)); // for code coverage
612     PetscCall(MatCoarsenSetThreshold(pc_gamg_agg->crs, vfilter));                          // for code coverage
613   } else {
614     PetscCall(PetscObjectTypeCompare((PetscObject)pc_gamg_agg->crs, MATCOARSENMIS, &ismis));
615     if (ismis && pc_gamg_agg->aggressive_coarsening_levels && !pc_gamg_agg->use_aggressive_square_graph) {
616       PetscCall(PetscInfo(pc, "MIS and aggressive coarsening and no square graph: force square graph\n"));
617       pc_gamg_agg->use_aggressive_square_graph = PETSC_TRUE;
618     }
619   }
620   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
621   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_GRAPH], 0, 0, 0, 0));
622   PetscCall(MatGetInfo(Amat, MAT_LOCAL, &info0)); /* global reduction */
623 
624   if (ishem || pc_gamg_agg->use_low_mem_filter) {
625     PetscCall(MatCreateGraph(Amat, PETSC_TRUE, (vfilter >= 0 || ishem) ? PETSC_TRUE : PETSC_FALSE, vfilter, a_Gmat));
626   } else {
627     // make scalar graph, symetrize if not know to be symetric, scale, but do not filter (expensive)
628     PetscCall(MatCreateGraph(Amat, PETSC_TRUE, PETSC_TRUE, -1, a_Gmat));
629     if (vfilter >= 0) {
630       PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
631       Mat                tGmat, Gmat = *a_Gmat;
632       MPI_Comm           comm;
633       const PetscScalar *vals;
634       const PetscInt    *idx;
635       PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
636       MatScalar         *AA; // this is checked in graph
637       PetscBool          isseqaij;
638       Mat                a, b, c;
639       MatType            jtype;
640 
641       PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
642       PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
643       PetscCall(MatGetType(Gmat, &jtype));
644       PetscCall(MatCreate(comm, &tGmat));
645       PetscCall(MatSetType(tGmat, jtype));
646 
647       /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
648         Also, if the matrix is symmetric, can we skip this
649         operation? It can be very expensive on large matrices. */
650 
651       // global sizes
652       PetscCall(MatGetSize(Gmat, &MM, &NN));
653       PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
654       nloc = Iend - Istart;
655       PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
656       if (isseqaij) {
657         a = Gmat;
658         b = NULL;
659       } else {
660         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
661         a             = d->A;
662         b             = d->B;
663         garray        = d->garray;
664       }
665       /* Determine upper bound on non-zeros needed in new filtered matrix */
666       for (PetscInt row = 0; row < nloc; row++) {
667         PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
668         d_nnz[row] = ncols;
669         if (ncols > maxcols) maxcols = ncols;
670         PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
671       }
672       if (b) {
673         for (PetscInt row = 0; row < nloc; row++) {
674           PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
675           o_nnz[row] = ncols;
676           if (ncols > maxcols) maxcols = ncols;
677           PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
678         }
679       }
680       PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
681       PetscCall(MatSetBlockSizes(tGmat, 1, 1));
682       PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
683       PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
684       PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
685       PetscCall(PetscFree2(d_nnz, o_nnz));
686       PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
687       nnz0 = nnz1 = 0;
688       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
689         for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
690           PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
691           for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
692             PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
693             if (PetscRealPart(sv) > vfilter) {
694               PetscInt cid = idx[jj] + Istart; //diag
695               nnz1++;
696               if (c != a) cid = garray[idx[jj]];
697               AA[ncol_row] = vals[jj];
698               AJ[ncol_row] = cid;
699               ncol_row++;
700             }
701           }
702           PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
703           PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
704         }
705       }
706       PetscCall(PetscFree2(AA, AJ));
707       PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
708       PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
709       PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
710       PetscCall(PetscInfo(pc, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %" PetscInt_FMT "\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, maxcols));
711       PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
712       PetscCall(MatDestroy(&Gmat));
713       *a_Gmat = tGmat;
714     }
715   }
716 
717   PetscCall(MatGetInfo(*a_Gmat, MAT_LOCAL, &info1)); /* global reduction */
718   PetscCall(MatGetBlockSize(Amat, &bs));
719   if (info0.nz_used > 0) PetscCall(PetscInfo(pc, "Filtering left %g %% edges in graph (%e %e)\n", 100.0 * info1.nz_used * (double)(bs * bs) / info0.nz_used, info0.nz_used, info1.nz_used));
720   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_GRAPH], 0, 0, 0, 0));
721   PetscFunctionReturn(PETSC_SUCCESS);
722 }
723 
724 typedef PetscInt    NState;
725 static const NState NOT_DONE = -2;
726 static const NState DELETED  = -1;
727 static const NState REMOVED  = -3;
728 #define IS_SELECTED(s) (s != DELETED && s != NOT_DONE && s != REMOVED)
729 
730 /*
731    fixAggregatesWithSquare - greedy grab of with G1 (unsquared graph) -- AIJ specific -- change to fixAggregatesWithSquare -- TODD
732      - AGG-MG specific: clears singletons out of 'selected_2'
733 
734    Input Parameter:
735    . Gmat_2 - global matrix of squared graph (data not defined)
736    . Gmat_1 - base graph to grab with base graph
737    Input/Output Parameter:
738    . aggs_2 - linked list of aggs with gids)
739 */
740 static PetscErrorCode fixAggregatesWithSquare(PC pc, Mat Gmat_2, Mat Gmat_1, PetscCoarsenData *aggs_2)
741 {
742   PetscBool      isMPI;
743   Mat_SeqAIJ    *matA_1, *matB_1 = NULL;
744   MPI_Comm       comm;
745   PetscInt       lid, *ii, *idx, ix, Iend, my0, kk, n, j;
746   Mat_MPIAIJ    *mpimat_2 = NULL, *mpimat_1 = NULL;
747   const PetscInt nloc = Gmat_2->rmap->n;
748   PetscScalar   *cpcol_1_state, *cpcol_2_state, *cpcol_2_par_orig, *lid_parent_gid;
749   PetscInt      *lid_cprowID_1 = NULL;
750   NState        *lid_state;
751   Vec            ghost_par_orig2;
752   PetscMPIInt    rank;
753 
754   PetscFunctionBegin;
755   PetscCall(PetscObjectGetComm((PetscObject)Gmat_2, &comm));
756   PetscCallMPI(MPI_Comm_rank(comm, &rank));
757   PetscCall(MatGetOwnershipRange(Gmat_1, &my0, &Iend));
758 
759   /* get submatrices */
760   PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name, MATMPIAIJ, &isMPI));
761   PetscCall(PetscInfo(pc, "isMPI = %s\n", isMPI ? "yes" : "no"));
762   PetscCall(PetscMalloc3(nloc, &lid_state, nloc, &lid_parent_gid, nloc, &lid_cprowID_1));
763   for (lid = 0; lid < nloc; lid++) lid_cprowID_1[lid] = -1;
764   if (isMPI) {
765     /* grab matrix objects */
766     mpimat_2 = (Mat_MPIAIJ *)Gmat_2->data;
767     mpimat_1 = (Mat_MPIAIJ *)Gmat_1->data;
768     matA_1   = (Mat_SeqAIJ *)mpimat_1->A->data;
769     matB_1   = (Mat_SeqAIJ *)mpimat_1->B->data;
770 
771     /* force compressed row storage for B matrix in AuxMat */
772     PetscCall(MatCheckCompressedRow(mpimat_1->B, matB_1->nonzerorowcnt, &matB_1->compressedrow, matB_1->i, Gmat_1->rmap->n, -1.0));
773     for (ix = 0; ix < matB_1->compressedrow.nrows; ix++) {
774       PetscInt lid = matB_1->compressedrow.rindex[ix];
775       PetscCheck(lid <= nloc && lid >= -1, PETSC_COMM_SELF, PETSC_ERR_USER, "lid %d out of range. nloc = %d", (int)lid, (int)nloc);
776       if (lid != -1) lid_cprowID_1[lid] = ix;
777     }
778   } else {
779     PetscBool isAIJ;
780     PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name, MATSEQAIJ, &isAIJ));
781     PetscCheck(isAIJ, PETSC_COMM_SELF, PETSC_ERR_USER, "Require AIJ matrix.");
782     matA_1 = (Mat_SeqAIJ *)Gmat_1->data;
783   }
784   if (nloc > 0) { PetscCheck(!matB_1 || matB_1->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_PLIB, "matB_1 && !matB_1->compressedrow.use: PETSc bug???"); }
785   /* get state of locals and selected gid for deleted */
786   for (lid = 0; lid < nloc; lid++) {
787     lid_parent_gid[lid] = -1.0;
788     lid_state[lid]      = DELETED;
789   }
790 
791   /* set lid_state */
792   for (lid = 0; lid < nloc; lid++) {
793     PetscCDIntNd *pos;
794     PetscCall(PetscCDGetHeadPos(aggs_2, lid, &pos));
795     if (pos) {
796       PetscInt gid1;
797 
798       PetscCall(PetscCDIntNdGetID(pos, &gid1));
799       PetscCheck(gid1 == lid + my0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "gid1 %d != lid %d + my0 %d", (int)gid1, (int)lid, (int)my0);
800       lid_state[lid] = gid1;
801     }
802   }
803 
804   /* map local to selected local, DELETED means a ghost owns it */
805   for (lid = kk = 0; lid < nloc; lid++) {
806     NState state = lid_state[lid];
807     if (IS_SELECTED(state)) {
808       PetscCDIntNd *pos;
809       PetscCall(PetscCDGetHeadPos(aggs_2, lid, &pos));
810       while (pos) {
811         PetscInt gid1;
812         PetscCall(PetscCDIntNdGetID(pos, &gid1));
813         PetscCall(PetscCDGetNextPos(aggs_2, lid, &pos));
814         if (gid1 >= my0 && gid1 < Iend) lid_parent_gid[gid1 - my0] = (PetscScalar)(lid + my0);
815       }
816     }
817   }
818   /* get 'cpcol_1/2_state' & cpcol_2_par_orig - uses mpimat_1/2->lvec for temp space */
819   if (isMPI) {
820     Vec tempVec;
821     /* get 'cpcol_1_state' */
822     PetscCall(MatCreateVecs(Gmat_1, &tempVec, NULL));
823     for (kk = 0, j = my0; kk < nloc; kk++, j++) {
824       PetscScalar v = (PetscScalar)lid_state[kk];
825       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
826     }
827     PetscCall(VecAssemblyBegin(tempVec));
828     PetscCall(VecAssemblyEnd(tempVec));
829     PetscCall(VecScatterBegin(mpimat_1->Mvctx, tempVec, mpimat_1->lvec, INSERT_VALUES, SCATTER_FORWARD));
830     PetscCall(VecScatterEnd(mpimat_1->Mvctx, tempVec, mpimat_1->lvec, INSERT_VALUES, SCATTER_FORWARD));
831     PetscCall(VecGetArray(mpimat_1->lvec, &cpcol_1_state));
832     /* get 'cpcol_2_state' */
833     PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, mpimat_2->lvec, INSERT_VALUES, SCATTER_FORWARD));
834     PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, mpimat_2->lvec, INSERT_VALUES, SCATTER_FORWARD));
835     PetscCall(VecGetArray(mpimat_2->lvec, &cpcol_2_state));
836     /* get 'cpcol_2_par_orig' */
837     for (kk = 0, j = my0; kk < nloc; kk++, j++) {
838       PetscScalar v = (PetscScalar)lid_parent_gid[kk];
839       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
840     }
841     PetscCall(VecAssemblyBegin(tempVec));
842     PetscCall(VecAssemblyEnd(tempVec));
843     PetscCall(VecDuplicate(mpimat_2->lvec, &ghost_par_orig2));
844     PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, ghost_par_orig2, INSERT_VALUES, SCATTER_FORWARD));
845     PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, ghost_par_orig2, INSERT_VALUES, SCATTER_FORWARD));
846     PetscCall(VecGetArray(ghost_par_orig2, &cpcol_2_par_orig));
847 
848     PetscCall(VecDestroy(&tempVec));
849   } /* ismpi */
850   for (lid = 0; lid < nloc; lid++) {
851     NState state = lid_state[lid];
852     if (IS_SELECTED(state)) {
853       /* steal locals */
854       ii  = matA_1->i;
855       n   = ii[lid + 1] - ii[lid];
856       idx = matA_1->j + ii[lid];
857       for (j = 0; j < n; j++) {
858         PetscInt lidj   = idx[j], sgid;
859         NState   statej = lid_state[lidj];
860         if (statej == DELETED && (sgid = (PetscInt)PetscRealPart(lid_parent_gid[lidj])) != lid + my0) { /* steal local */
861           lid_parent_gid[lidj] = (PetscScalar)(lid + my0);                                              /* send this if sgid is not local */
862           if (sgid >= my0 && sgid < Iend) {                                                             /* I'm stealing this local from a local sgid */
863             PetscInt      hav = 0, slid = sgid - my0, gidj = lidj + my0;
864             PetscCDIntNd *pos, *last = NULL;
865             /* looking for local from local so id_llist_2 works */
866             PetscCall(PetscCDGetHeadPos(aggs_2, slid, &pos));
867             while (pos) {
868               PetscInt gid;
869               PetscCall(PetscCDIntNdGetID(pos, &gid));
870               if (gid == gidj) {
871                 PetscCheck(last, PETSC_COMM_SELF, PETSC_ERR_PLIB, "last cannot be null");
872                 PetscCall(PetscCDRemoveNextNode(aggs_2, slid, last));
873                 PetscCall(PetscCDAppendNode(aggs_2, lid, pos));
874                 hav = 1;
875                 break;
876               } else last = pos;
877               PetscCall(PetscCDGetNextPos(aggs_2, slid, &pos));
878             }
879             if (hav != 1) {
880               PetscCheck(hav, PETSC_COMM_SELF, PETSC_ERR_PLIB, "failed to find adj in 'selected' lists - structurally unsymmetric matrix");
881               SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "found node %d times???", (int)hav);
882             }
883           } else { /* I'm stealing this local, owned by a ghost */
884             PetscCheck(sgid == -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mat has an un-symmetric graph. Use '-%spc_gamg_sym_graph true' to symmetrize the graph or '-%spc_gamg_threshold -1' if the matrix is structurally symmetric.",
885                        ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "", ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "");
886             PetscCall(PetscCDAppendID(aggs_2, lid, lidj + my0));
887           }
888         }
889       } /* local neighbors */
890     } else if (state == DELETED /* && lid_cprowID_1 */) {
891       PetscInt sgidold = (PetscInt)PetscRealPart(lid_parent_gid[lid]);
892       /* see if I have a selected ghost neighbor that will steal me */
893       if ((ix = lid_cprowID_1[lid]) != -1) {
894         ii  = matB_1->compressedrow.i;
895         n   = ii[ix + 1] - ii[ix];
896         idx = matB_1->j + ii[ix];
897         for (j = 0; j < n; j++) {
898           PetscInt cpid   = idx[j];
899           NState   statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
900           if (IS_SELECTED(statej) && sgidold != (PetscInt)statej) { /* ghost will steal this, remove from my list */
901             lid_parent_gid[lid] = (PetscScalar)statej;              /* send who selected */
902             if (sgidold >= my0 && sgidold < Iend) {                 /* this was mine */
903               PetscInt      hav = 0, oldslidj = sgidold - my0;
904               PetscCDIntNd *pos, *last        = NULL;
905               /* remove from 'oldslidj' list */
906               PetscCall(PetscCDGetHeadPos(aggs_2, oldslidj, &pos));
907               while (pos) {
908                 PetscInt gid;
909                 PetscCall(PetscCDIntNdGetID(pos, &gid));
910                 if (lid + my0 == gid) {
911                   /* id_llist_2[lastid] = id_llist_2[flid];   /\* remove lid from oldslidj list *\/ */
912                   PetscCheck(last, PETSC_COMM_SELF, PETSC_ERR_PLIB, "last cannot be null");
913                   PetscCall(PetscCDRemoveNextNode(aggs_2, oldslidj, last));
914                   /* ghost (PetscScalar)statej will add this later */
915                   hav = 1;
916                   break;
917                 } else last = pos;
918                 PetscCall(PetscCDGetNextPos(aggs_2, oldslidj, &pos));
919               }
920               if (hav != 1) {
921                 PetscCheck(hav, PETSC_COMM_SELF, PETSC_ERR_PLIB, "failed to find (hav=%d) adj in 'selected' lists - structurally unsymmetric matrix", (int)hav);
922                 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "found node %d times???", (int)hav);
923               }
924             } else {
925               /* TODO: ghosts remove this later */
926             }
927           }
928         }
929       }
930     } /* selected/deleted */
931   }   /* node loop */
932 
933   if (isMPI) {
934     PetscScalar    *cpcol_2_parent, *cpcol_2_gid;
935     Vec             tempVec, ghostgids2, ghostparents2;
936     PetscInt        cpid, nghost_2;
937     PCGAMGHashTable gid_cpid;
938 
939     PetscCall(VecGetSize(mpimat_2->lvec, &nghost_2));
940     PetscCall(MatCreateVecs(Gmat_2, &tempVec, NULL));
941 
942     /* get 'cpcol_2_parent' */
943     for (kk = 0, j = my0; kk < nloc; kk++, j++) { PetscCall(VecSetValues(tempVec, 1, &j, &lid_parent_gid[kk], INSERT_VALUES)); }
944     PetscCall(VecAssemblyBegin(tempVec));
945     PetscCall(VecAssemblyEnd(tempVec));
946     PetscCall(VecDuplicate(mpimat_2->lvec, &ghostparents2));
947     PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, ghostparents2, INSERT_VALUES, SCATTER_FORWARD));
948     PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, ghostparents2, INSERT_VALUES, SCATTER_FORWARD));
949     PetscCall(VecGetArray(ghostparents2, &cpcol_2_parent));
950 
951     /* get 'cpcol_2_gid' */
952     for (kk = 0, j = my0; kk < nloc; kk++, j++) {
953       PetscScalar v = (PetscScalar)j;
954       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
955     }
956     PetscCall(VecAssemblyBegin(tempVec));
957     PetscCall(VecAssemblyEnd(tempVec));
958     PetscCall(VecDuplicate(mpimat_2->lvec, &ghostgids2));
959     PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, ghostgids2, INSERT_VALUES, SCATTER_FORWARD));
960     PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, ghostgids2, INSERT_VALUES, SCATTER_FORWARD));
961     PetscCall(VecGetArray(ghostgids2, &cpcol_2_gid));
962     PetscCall(VecDestroy(&tempVec));
963 
964     /* look for deleted ghosts and add to table */
965     PetscCall(PCGAMGHashTableCreate(2 * nghost_2 + 1, &gid_cpid));
966     for (cpid = 0; cpid < nghost_2; cpid++) {
967       NState state = (NState)PetscRealPart(cpcol_2_state[cpid]);
968       if (state == DELETED) {
969         PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
970         PetscInt sgid_old = (PetscInt)PetscRealPart(cpcol_2_par_orig[cpid]);
971         if (sgid_old == -1 && sgid_new != -1) {
972           PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
973           PetscCall(PCGAMGHashTableAdd(&gid_cpid, gid, cpid));
974         }
975       }
976     }
977 
978     /* look for deleted ghosts and see if they moved - remove it */
979     for (lid = 0; lid < nloc; lid++) {
980       NState state = lid_state[lid];
981       if (IS_SELECTED(state)) {
982         PetscCDIntNd *pos, *last = NULL;
983         /* look for deleted ghosts and see if they moved */
984         PetscCall(PetscCDGetHeadPos(aggs_2, lid, &pos));
985         while (pos) {
986           PetscInt gid;
987           PetscCall(PetscCDIntNdGetID(pos, &gid));
988 
989           if (gid < my0 || gid >= Iend) {
990             PetscCall(PCGAMGHashTableFind(&gid_cpid, gid, &cpid));
991             if (cpid != -1) {
992               /* a moved ghost - */
993               /* id_llist_2[lastid] = id_llist_2[flid];    /\* remove 'flid' from list *\/ */
994               PetscCall(PetscCDRemoveNextNode(aggs_2, lid, last));
995             } else last = pos;
996           } else last = pos;
997 
998           PetscCall(PetscCDGetNextPos(aggs_2, lid, &pos));
999         } /* loop over list of deleted */
1000       }   /* selected */
1001     }
1002     PetscCall(PCGAMGHashTableDestroy(&gid_cpid));
1003 
1004     /* look at ghosts, see if they changed - and it */
1005     for (cpid = 0; cpid < nghost_2; cpid++) {
1006       PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
1007       if (sgid_new >= my0 && sgid_new < Iend) { /* this is mine */
1008         PetscInt      gid      = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
1009         PetscInt      slid_new = sgid_new - my0, hav = 0;
1010         PetscCDIntNd *pos;
1011 
1012         /* search for this gid to see if I have it */
1013         PetscCall(PetscCDGetHeadPos(aggs_2, slid_new, &pos));
1014         while (pos) {
1015           PetscInt gidj;
1016           PetscCall(PetscCDIntNdGetID(pos, &gidj));
1017           PetscCall(PetscCDGetNextPos(aggs_2, slid_new, &pos));
1018 
1019           if (gidj == gid) {
1020             hav = 1;
1021             break;
1022           }
1023         }
1024         if (hav != 1) {
1025           /* insert 'flidj' into head of llist */
1026           PetscCall(PetscCDAppendID(aggs_2, slid_new, gid));
1027         }
1028       }
1029     }
1030     PetscCall(VecRestoreArray(mpimat_1->lvec, &cpcol_1_state));
1031     PetscCall(VecRestoreArray(mpimat_2->lvec, &cpcol_2_state));
1032     PetscCall(VecRestoreArray(ghostparents2, &cpcol_2_parent));
1033     PetscCall(VecRestoreArray(ghostgids2, &cpcol_2_gid));
1034     PetscCall(VecDestroy(&ghostgids2));
1035     PetscCall(VecDestroy(&ghostparents2));
1036     PetscCall(VecDestroy(&ghost_par_orig2));
1037   }
1038   PetscCall(PetscFree3(lid_state, lid_parent_gid, lid_cprowID_1));
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 /*
1043    PCGAMGCoarsen_AGG - supports squaring the graph (deprecated) and new graph for
1044      communication of QR data used with HEM and MISk coarsening
1045 
1046   Input Parameter:
1047    . a_pc - this
1048 
1049   Input/Output Parameter:
1050    . a_Gmat1 - graph to coarsen (in), graph off processor edges for QR gather scatter (out)
1051 
1052   Output Parameter:
1053    . agg_lists - list of aggregates
1054 
1055 */
1056 static PetscErrorCode PCGAMGCoarsen_AGG(PC a_pc, Mat *a_Gmat1, PetscCoarsenData **agg_lists)
1057 {
1058   PC_MG       *mg          = (PC_MG *)a_pc->data;
1059   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
1060   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
1061   Mat          mat, Gmat2, Gmat1 = *a_Gmat1; /* aggressive graph */
1062   IS           perm;
1063   PetscInt     Istart, Iend, Ii, nloc, bs, nn;
1064   PetscInt    *permute, *degree;
1065   PetscBool   *bIndexSet;
1066   PetscReal    hashfact;
1067   PetscInt     iSwapIndex;
1068   PetscRandom  random;
1069   MPI_Comm     comm;
1070 
1071   PetscFunctionBegin;
1072   PetscCall(PetscObjectGetComm((PetscObject)Gmat1, &comm));
1073   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
1074   PetscCall(MatGetLocalSize(Gmat1, &nn, NULL));
1075   PetscCall(MatGetBlockSize(Gmat1, &bs));
1076   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "bs %" PetscInt_FMT " must be 1", bs);
1077   nloc = nn / bs;
1078   /* get MIS aggs - randomize */
1079   PetscCall(PetscMalloc2(nloc, &permute, nloc, &degree));
1080   PetscCall(PetscCalloc1(nloc, &bIndexSet));
1081   for (Ii = 0; Ii < nloc; Ii++) permute[Ii] = Ii;
1082   PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &random));
1083   PetscCall(MatGetOwnershipRange(Gmat1, &Istart, &Iend));
1084   for (Ii = 0; Ii < nloc; Ii++) {
1085     PetscInt nc;
1086     PetscCall(MatGetRow(Gmat1, Istart + Ii, &nc, NULL, NULL));
1087     degree[Ii] = nc;
1088     PetscCall(MatRestoreRow(Gmat1, Istart + Ii, &nc, NULL, NULL));
1089   }
1090   for (Ii = 0; Ii < nloc; Ii++) {
1091     PetscCall(PetscRandomGetValueReal(random, &hashfact));
1092     iSwapIndex = (PetscInt)(hashfact * nloc) % nloc;
1093     if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
1094       PetscInt iTemp        = permute[iSwapIndex];
1095       permute[iSwapIndex]   = permute[Ii];
1096       permute[Ii]           = iTemp;
1097       iTemp                 = degree[iSwapIndex];
1098       degree[iSwapIndex]    = degree[Ii];
1099       degree[Ii]            = iTemp;
1100       bIndexSet[iSwapIndex] = PETSC_TRUE;
1101     }
1102   }
1103   // apply minimum degree ordering -- NEW
1104   if (pc_gamg_agg->use_minimum_degree_ordering) { PetscCall(PetscSortIntWithArray(nloc, degree, permute)); }
1105   PetscCall(PetscFree(bIndexSet));
1106   PetscCall(PetscRandomDestroy(&random));
1107   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm));
1108   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_MIS], 0, 0, 0, 0));
1109   // square graph
1110   if (pc_gamg->current_level < pc_gamg_agg->aggressive_coarsening_levels && pc_gamg_agg->use_aggressive_square_graph) {
1111     PetscCall(PCGAMGSquareGraph_GAMG(a_pc, Gmat1, &Gmat2));
1112   } else Gmat2 = Gmat1;
1113   // switch to old MIS-1 for square graph
1114   if (pc_gamg->current_level < pc_gamg_agg->aggressive_coarsening_levels) {
1115     if (!pc_gamg_agg->use_aggressive_square_graph) PetscCall(MatCoarsenMISKSetDistance(pc_gamg_agg->crs, pc_gamg_agg->aggressive_mis_k)); // hardwire to MIS-2
1116     else PetscCall(MatCoarsenSetType(pc_gamg_agg->crs, MATCOARSENMIS));                                                                   // old MIS -- side effect
1117   } else if (pc_gamg_agg->use_aggressive_square_graph && pc_gamg_agg->aggressive_coarsening_levels > 0) {                                 // we reset the MIS
1118     const char *prefix;
1119     PetscCall(PetscObjectGetOptionsPrefix((PetscObject)a_pc, &prefix));
1120     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)pc_gamg_agg->crs, prefix));
1121     PetscCall(MatCoarsenSetFromOptions(pc_gamg_agg->crs)); // get the default back on non-aggressive levels when square graph switched to old MIS
1122   }
1123   PetscCall(MatCoarsenSetAdjacency(pc_gamg_agg->crs, Gmat2));
1124   PetscCall(MatCoarsenSetStrictAggs(pc_gamg_agg->crs, PETSC_TRUE));
1125   PetscCall(MatCoarsenSetGreedyOrdering(pc_gamg_agg->crs, perm));
1126   PetscCall(MatCoarsenApply(pc_gamg_agg->crs));
1127   PetscCall(MatCoarsenViewFromOptions(pc_gamg_agg->crs, NULL, "-mat_coarsen_view"));
1128   PetscCall(MatCoarsenGetData(pc_gamg_agg->crs, agg_lists)); /* output */
1129   PetscCall(MatCoarsenDestroy(&pc_gamg_agg->crs));
1130 
1131   PetscCall(ISDestroy(&perm));
1132   PetscCall(PetscFree2(permute, degree));
1133   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_MIS], 0, 0, 0, 0));
1134 
1135   if (Gmat2 != Gmat1) { // square graph, we need ghosts for selected
1136     PetscCoarsenData *llist = *agg_lists;
1137     PetscCall(fixAggregatesWithSquare(a_pc, Gmat2, Gmat1, *agg_lists));
1138     PetscCall(MatDestroy(&Gmat1));
1139     *a_Gmat1 = Gmat2; /* output */
1140     PetscCall(PetscCDGetMat(llist, &mat));
1141     PetscCheck(!mat, comm, PETSC_ERR_ARG_WRONG, "Unexpected auxiliary matrix with squared graph");
1142   } else {
1143     PetscCoarsenData *llist = *agg_lists;
1144     /* see if we have a matrix that takes precedence (returned from MatCoarsenApply) */
1145     PetscCall(PetscCDGetMat(llist, &mat));
1146     if (mat) {
1147       PetscCall(MatDestroy(a_Gmat1));
1148       *a_Gmat1 = mat; /* output */
1149     }
1150   }
1151   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 /*
1156  PCGAMGProlongator_AGG
1157 
1158  Input Parameter:
1159  . pc - this
1160  . Amat - matrix on this fine level
1161  . Graph - used to get ghost data for nodes in
1162  . agg_lists - list of aggregates
1163  Output Parameter:
1164  . a_P_out - prolongation operator to the next level
1165  */
1166 static PetscErrorCode PCGAMGProlongator_AGG(PC pc, Mat Amat, Mat Gmat, PetscCoarsenData *agg_lists, Mat *a_P_out)
1167 {
1168   PC_MG         *mg      = (PC_MG *)pc->data;
1169   PC_GAMG       *pc_gamg = (PC_GAMG *)mg->innerctx;
1170   const PetscInt col_bs  = pc_gamg->data_cell_cols;
1171   PetscInt       Istart, Iend, nloc, ii, jj, kk, my0, nLocalSelected, bs;
1172   Mat            Prol;
1173   PetscMPIInt    size;
1174   MPI_Comm       comm;
1175   PetscReal     *data_w_ghost;
1176   PetscInt       myCrs0, nbnodes = 0, *flid_fgid;
1177   MatType        mtype;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1181   PetscCheck(col_bs >= 1, comm, PETSC_ERR_PLIB, "Column bs cannot be less than 1");
1182   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROL], 0, 0, 0, 0));
1183   PetscCallMPI(MPI_Comm_size(comm, &size));
1184   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
1185   PetscCall(MatGetBlockSize(Amat, &bs));
1186   nloc = (Iend - Istart) / bs;
1187   my0  = Istart / bs;
1188   PetscCheck((Iend - Istart) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "(Iend %" PetscInt_FMT " - Istart %" PetscInt_FMT ") not divisible by bs %" PetscInt_FMT, Iend, Istart, bs);
1189 
1190   /* get 'nLocalSelected' */
1191   for (ii = 0, nLocalSelected = 0; ii < nloc; ii++) {
1192     PetscBool ise;
1193     /* filter out singletons 0 or 1? */
1194     PetscCall(PetscCDIsEmptyAt(agg_lists, ii, &ise));
1195     if (!ise) nLocalSelected++;
1196   }
1197 
1198   /* create prolongator, create P matrix */
1199   PetscCall(MatGetType(Amat, &mtype));
1200   PetscCall(MatCreate(comm, &Prol));
1201   PetscCall(MatSetSizes(Prol, nloc * bs, nLocalSelected * col_bs, PETSC_DETERMINE, PETSC_DETERMINE));
1202   PetscCall(MatSetBlockSizes(Prol, bs, col_bs));
1203   PetscCall(MatSetType(Prol, mtype));
1204 #if PetscDefined(HAVE_DEVICE)
1205   PetscBool flg;
1206   PetscCall(MatBoundToCPU(Amat, &flg));
1207   PetscCall(MatBindToCPU(Prol, flg));
1208   if (flg) PetscCall(MatSetBindingPropagates(Prol, PETSC_TRUE));
1209 #endif
1210   PetscCall(MatSeqAIJSetPreallocation(Prol, col_bs, NULL));
1211   PetscCall(MatMPIAIJSetPreallocation(Prol, col_bs, NULL, col_bs, NULL));
1212 
1213   /* can get all points "removed" */
1214   PetscCall(MatGetSize(Prol, &kk, &ii));
1215   if (!ii) {
1216     PetscCall(PetscInfo(pc, "%s: No selected points on coarse grid\n", ((PetscObject)pc)->prefix));
1217     PetscCall(MatDestroy(&Prol));
1218     *a_P_out = NULL; /* out */
1219     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROL], 0, 0, 0, 0));
1220     PetscFunctionReturn(PETSC_SUCCESS);
1221   }
1222   PetscCall(PetscInfo(pc, "%s: New grid %" PetscInt_FMT " nodes\n", ((PetscObject)pc)->prefix, ii / col_bs));
1223   PetscCall(MatGetOwnershipRangeColumn(Prol, &myCrs0, &kk));
1224 
1225   PetscCheck((kk - myCrs0) % col_bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "(kk %" PetscInt_FMT " -myCrs0 %" PetscInt_FMT ") not divisible by col_bs %" PetscInt_FMT, kk, myCrs0, col_bs);
1226   myCrs0 = myCrs0 / col_bs;
1227   PetscCheck((kk / col_bs - myCrs0) == nLocalSelected, PETSC_COMM_SELF, PETSC_ERR_PLIB, "(kk %" PetscInt_FMT "/col_bs %" PetscInt_FMT " - myCrs0 %" PetscInt_FMT ") != nLocalSelected %" PetscInt_FMT ")", kk, col_bs, myCrs0, nLocalSelected);
1228 
1229   /* create global vector of data in 'data_w_ghost' */
1230   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROLA], 0, 0, 0, 0));
1231   if (size > 1) { /* get ghost null space data */
1232     PetscReal *tmp_gdata, *tmp_ldata, *tp2;
1233     PetscCall(PetscMalloc1(nloc, &tmp_ldata));
1234     for (jj = 0; jj < col_bs; jj++) {
1235       for (kk = 0; kk < bs; kk++) {
1236         PetscInt         ii, stride;
1237         const PetscReal *tp = pc_gamg->data + jj * bs * nloc + kk;
1238         for (ii = 0; ii < nloc; ii++, tp += bs) tmp_ldata[ii] = *tp;
1239 
1240         PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, tmp_ldata, &stride, &tmp_gdata));
1241 
1242         if (!jj && !kk) { /* now I know how many total nodes - allocate TODO: move below and do in one 'col_bs' call */
1243           PetscCall(PetscMalloc1(stride * bs * col_bs, &data_w_ghost));
1244           nbnodes = bs * stride;
1245         }
1246         tp2 = data_w_ghost + jj * bs * stride + kk;
1247         for (ii = 0; ii < stride; ii++, tp2 += bs) *tp2 = tmp_gdata[ii];
1248         PetscCall(PetscFree(tmp_gdata));
1249       }
1250     }
1251     PetscCall(PetscFree(tmp_ldata));
1252   } else {
1253     nbnodes      = bs * nloc;
1254     data_w_ghost = (PetscReal *)pc_gamg->data;
1255   }
1256 
1257   /* get 'flid_fgid' TODO - move up to get 'stride' and do get null space data above in one step (jj loop) */
1258   if (size > 1) {
1259     PetscReal *fid_glid_loc, *fiddata;
1260     PetscInt   stride;
1261 
1262     PetscCall(PetscMalloc1(nloc, &fid_glid_loc));
1263     for (kk = 0; kk < nloc; kk++) fid_glid_loc[kk] = (PetscReal)(my0 + kk);
1264     PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, fid_glid_loc, &stride, &fiddata));
1265     PetscCall(PetscMalloc1(stride, &flid_fgid)); /* copy real data to in */
1266     for (kk = 0; kk < stride; kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1267     PetscCall(PetscFree(fiddata));
1268 
1269     PetscCheck(stride == nbnodes / bs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "stride %" PetscInt_FMT " != nbnodes %" PetscInt_FMT "/bs %" PetscInt_FMT, stride, nbnodes, bs);
1270     PetscCall(PetscFree(fid_glid_loc));
1271   } else {
1272     PetscCall(PetscMalloc1(nloc, &flid_fgid));
1273     for (kk = 0; kk < nloc; kk++) flid_fgid[kk] = my0 + kk;
1274   }
1275   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROLA], 0, 0, 0, 0));
1276   /* get P0 */
1277   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROLB], 0, 0, 0, 0));
1278   {
1279     PetscReal *data_out = NULL;
1280     PetscCall(formProl0(agg_lists, bs, col_bs, myCrs0, nbnodes, data_w_ghost, flid_fgid, &data_out, Prol));
1281     PetscCall(PetscFree(pc_gamg->data));
1282 
1283     pc_gamg->data           = data_out;
1284     pc_gamg->data_cell_rows = col_bs;
1285     pc_gamg->data_sz        = col_bs * col_bs * nLocalSelected;
1286   }
1287   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROLB], 0, 0, 0, 0));
1288   if (size > 1) PetscCall(PetscFree(data_w_ghost));
1289   PetscCall(PetscFree(flid_fgid));
1290 
1291   *a_P_out = Prol; /* out */
1292   PetscCall(MatViewFromOptions(Prol, NULL, "-view_P"));
1293 
1294   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROL], 0, 0, 0, 0));
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 
1298 /*
1299    PCGAMGOptProlongator_AGG
1300 
1301   Input Parameter:
1302    . pc - this
1303    . Amat - matrix on this fine level
1304  In/Output Parameter:
1305    . a_P - prolongation operator to the next level
1306 */
1307 static PetscErrorCode PCGAMGOptProlongator_AGG(PC pc, Mat Amat, Mat *a_P)
1308 {
1309   PC_MG       *mg          = (PC_MG *)pc->data;
1310   PC_GAMG     *pc_gamg     = (PC_GAMG *)mg->innerctx;
1311   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
1312   PetscInt     jj;
1313   Mat          Prol = *a_P;
1314   MPI_Comm     comm;
1315   KSP          eksp;
1316   Vec          bb, xx;
1317   PC           epc;
1318   PetscReal    alpha, emax, emin;
1319 
1320   PetscFunctionBegin;
1321   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1322   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_OPT], 0, 0, 0, 0));
1323 
1324   /* compute maximum singular value of operator to be used in smoother */
1325   if (0 < pc_gamg_agg->nsmooths) {
1326     /* get eigen estimates */
1327     if (pc_gamg->emax > 0) {
1328       emin = pc_gamg->emin;
1329       emax = pc_gamg->emax;
1330     } else {
1331       const char *prefix;
1332 
1333       PetscCall(MatCreateVecs(Amat, &bb, NULL));
1334       PetscCall(MatCreateVecs(Amat, &xx, NULL));
1335       PetscCall(KSPSetNoisy_Private(bb));
1336 
1337       PetscCall(KSPCreate(comm, &eksp));
1338       PetscCall(KSPSetNestLevel(eksp, pc->kspnestlevel));
1339       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1340       PetscCall(KSPSetOptionsPrefix(eksp, prefix));
1341       PetscCall(KSPAppendOptionsPrefix(eksp, "pc_gamg_esteig_"));
1342       {
1343         PetscBool isset, sflg;
1344         PetscCall(MatIsSPDKnown(Amat, &isset, &sflg));
1345         if (isset && sflg) PetscCall(KSPSetType(eksp, KSPCG));
1346       }
1347       PetscCall(KSPSetErrorIfNotConverged(eksp, pc->erroriffailure));
1348       PetscCall(KSPSetNormType(eksp, KSP_NORM_NONE));
1349 
1350       PetscCall(KSPSetInitialGuessNonzero(eksp, PETSC_FALSE));
1351       PetscCall(KSPSetOperators(eksp, Amat, Amat));
1352 
1353       PetscCall(KSPGetPC(eksp, &epc));
1354       PetscCall(PCSetType(epc, PCJACOBI)); /* smoother in smoothed agg. */
1355 
1356       PetscCall(KSPSetTolerances(eksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 10)); // 10 is safer, but 5 is often fine, can override with -pc_gamg_esteig_ksp_max_it -mg_levels_ksp_chebyshev_esteig 0,0.25,0,1.2
1357 
1358       PetscCall(KSPSetFromOptions(eksp));
1359       PetscCall(KSPSetComputeSingularValues(eksp, PETSC_TRUE));
1360       PetscCall(KSPSolve(eksp, bb, xx));
1361       PetscCall(KSPCheckSolve(eksp, pc, xx));
1362 
1363       PetscCall(KSPComputeExtremeSingularValues(eksp, &emax, &emin));
1364       PetscCall(PetscInfo(pc, "%s: Smooth P0: max eigen=%e min=%e PC=%s\n", ((PetscObject)pc)->prefix, (double)emax, (double)emin, PCJACOBI));
1365       PetscCall(VecDestroy(&xx));
1366       PetscCall(VecDestroy(&bb));
1367       PetscCall(KSPDestroy(&eksp));
1368     }
1369     if (pc_gamg->use_sa_esteig) {
1370       mg->min_eigen_DinvA[pc_gamg->current_level] = emin;
1371       mg->max_eigen_DinvA[pc_gamg->current_level] = emax;
1372       PetscCall(PetscInfo(pc, "%s: Smooth P0: level %" PetscInt_FMT ", cache spectra %g %g\n", ((PetscObject)pc)->prefix, pc_gamg->current_level, (double)emin, (double)emax));
1373     } else {
1374       mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1375       mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1376     }
1377   } else {
1378     mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1379     mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1380   }
1381 
1382   /* smooth P0 */
1383   for (jj = 0; jj < pc_gamg_agg->nsmooths; jj++) {
1384     Mat tMat;
1385     Vec diag;
1386 
1387     PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_OPTSM], 0, 0, 0, 0));
1388 
1389     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1390     PetscCall(PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2], 0, 0, 0, 0));
1391     PetscCall(MatMatMult(Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat));
1392     PetscCall(PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2], 0, 0, 0, 0));
1393     PetscCall(MatProductClear(tMat));
1394     PetscCall(MatCreateVecs(Amat, &diag, NULL));
1395     PetscCall(MatGetDiagonal(Amat, diag)); /* effectively PCJACOBI */
1396     PetscCall(VecReciprocal(diag));
1397     PetscCall(MatDiagonalScale(tMat, diag, NULL));
1398     PetscCall(VecDestroy(&diag));
1399 
1400     /* TODO: Set a PCFailedReason and exit the building of the AMG preconditioner */
1401     PetscCheck(emax != 0.0, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Computed maximum singular value as zero");
1402     /* TODO: Document the 1.4 and don't hardwire it in this routine */
1403     alpha = -1.4 / emax;
1404 
1405     PetscCall(MatAYPX(tMat, alpha, Prol, SUBSET_NONZERO_PATTERN));
1406     PetscCall(MatDestroy(&Prol));
1407     Prol = tMat;
1408     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_OPTSM], 0, 0, 0, 0));
1409   }
1410   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_OPT], 0, 0, 0, 0));
1411   *a_P = Prol;
1412   PetscFunctionReturn(PETSC_SUCCESS);
1413 }
1414 
1415 /*
1416    PCCreateGAMG_AGG
1417 
1418   Input Parameter:
1419    . pc -
1420 */
1421 PetscErrorCode PCCreateGAMG_AGG(PC pc)
1422 {
1423   PC_MG       *mg      = (PC_MG *)pc->data;
1424   PC_GAMG     *pc_gamg = (PC_GAMG *)mg->innerctx;
1425   PC_GAMG_AGG *pc_gamg_agg;
1426 
1427   PetscFunctionBegin;
1428   /* create sub context for SA */
1429   PetscCall(PetscNew(&pc_gamg_agg));
1430   pc_gamg->subctx = pc_gamg_agg;
1431 
1432   pc_gamg->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1433   pc_gamg->ops->destroy        = PCDestroy_GAMG_AGG;
1434   /* reset does not do anything; setup not virtual */
1435 
1436   /* set internal function pointers */
1437   pc_gamg->ops->creategraph       = PCGAMGCreateGraph_AGG;
1438   pc_gamg->ops->coarsen           = PCGAMGCoarsen_AGG;
1439   pc_gamg->ops->prolongator       = PCGAMGProlongator_AGG;
1440   pc_gamg->ops->optprolongator    = PCGAMGOptProlongator_AGG;
1441   pc_gamg->ops->createdefaultdata = PCSetData_AGG;
1442   pc_gamg->ops->view              = PCView_GAMG_AGG;
1443 
1444   pc_gamg_agg->nsmooths                     = 1;
1445   pc_gamg_agg->aggressive_coarsening_levels = 1;
1446   pc_gamg_agg->use_aggressive_square_graph  = PETSC_TRUE;
1447   pc_gamg_agg->use_minimum_degree_ordering  = PETSC_FALSE;
1448   pc_gamg_agg->use_low_mem_filter           = PETSC_FALSE;
1449   pc_gamg_agg->aggressive_mis_k             = 2;
1450 
1451   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetNSmooths_C", PCGAMGSetNSmooths_AGG));
1452   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveLevels_C", PCGAMGSetAggressiveLevels_AGG));
1453   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveSquareGraph_C", PCGAMGSetAggressiveSquareGraph_AGG));
1454   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetMinDegreeOrdering_C", PCGAMGMISkSetMinDegreeOrdering_AGG));
1455   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetLowMemoryFilter_C", PCGAMGSetLowMemoryFilter_AGG));
1456   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetAggressive_C", PCGAMGMISkSetAggressive_AGG));
1457   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_AGG));
1458   PetscFunctionReturn(PETSC_SUCCESS);
1459 }
1460