xref: /petsc/src/mat/graphops/coarsen/impls/hem/hem.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
1 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>
4 #include <petscdm.h>
5 
6 /* linked list methods
7  *
8  *  PetscCDCreate
9  */
10 PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
11 {
12   PetscCoarsenData *ail;
13 
14   PetscFunctionBegin;
15   /* allocate pool, partially */
16   PetscCall(PetscNew(&ail));
17   *a_out               = ail;
18   ail->pool_list.next  = NULL;
19   ail->pool_list.array = NULL;
20   ail->chk_sz          = 0;
21   /* allocate array */
22   ail->size = a_size;
23   PetscCall(PetscCalloc1(a_size, &ail->array));
24   ail->extra_nodes = NULL;
25   ail->mat         = NULL;
26   PetscFunctionReturn(PETSC_SUCCESS);
27 }
28 
29 /* PetscCDDestroy
30  */
31 PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
32 {
33   PetscCDArrNd *n = &ail->pool_list;
34 
35   PetscFunctionBegin;
36   n = n->next;
37   while (n) {
38     PetscCDArrNd *lstn = n;
39 
40     n = n->next;
41     PetscCall(PetscFree(lstn));
42   }
43   if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array));
44   PetscCall(PetscFree(ail->array));
45   if (ail->mat) PetscCall(MatDestroy(&ail->mat));
46   /* delete this (+agg+pool array) */
47   PetscCall(PetscFree(ail));
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 /* PetscCDSetChunkSize
52  */
53 PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz)
54 {
55   PetscFunctionBegin;
56   ail->chk_sz = a_sz;
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /*  PetscCDGetNewNode
61  */
62 static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
63 {
64   PetscFunctionBegin;
65   *a_out = NULL; /* squelch -Wmaybe-uninitialized */
66   if (ail->extra_nodes) {
67     PetscCDIntNd *node = ail->extra_nodes;
68 
69     ail->extra_nodes = node->next;
70     node->gid        = a_id;
71     node->next       = NULL;
72     *a_out           = node;
73   } else {
74     if (!ail->pool_list.array) {
75       if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
76       PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array));
77       ail->new_node       = ail->pool_list.array;
78       ail->new_left       = ail->chk_sz;
79       ail->new_node->next = NULL;
80     } else if (!ail->new_left) {
81       PetscCDArrNd *node;
82 
83       PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node));
84       node->array         = (PetscCDIntNd *)(node + 1);
85       node->next          = ail->pool_list.next;
86       ail->pool_list.next = node;
87       ail->new_left       = ail->chk_sz;
88       ail->new_node       = node->array;
89     }
90     ail->new_node->gid  = a_id;
91     ail->new_node->next = NULL;
92     *a_out              = ail->new_node++;
93     ail->new_left--;
94   }
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 /* PetscCDIntNdSetID
99  */
100 PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
101 {
102   PetscFunctionBegin;
103   a_this->gid = a_id;
104   PetscFunctionReturn(PETSC_SUCCESS);
105 }
106 
107 /* PetscCDIntNdGetID
108  */
109 PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
110 {
111   PetscFunctionBegin;
112   *a_gid = a_this->gid;
113   PetscFunctionReturn(PETSC_SUCCESS);
114 }
115 
116 /* PetscCDGetHeadPos
117  */
118 PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
119 {
120   PetscFunctionBegin;
121   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx);
122   *pos = ail->array[a_idx];
123   PetscFunctionReturn(PETSC_SUCCESS);
124 }
125 
126 /* PetscCDGetNextPos
127  */
128 PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
129 {
130   PetscFunctionBegin;
131   PetscCheck(*pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position.");
132   *pos = (*pos)->next;
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /* PetscCDAppendID
137  */
138 PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
139 {
140   PetscCDIntNd *n, *n2;
141 
142   PetscFunctionBegin;
143   PetscCall(PetscCDGetNewNode(ail, &n, a_id));
144   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
145   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
146   else {
147     do {
148       if (!n2->next) {
149         n2->next = n;
150         PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next");
151         break;
152       }
153       n2 = n2->next;
154     } while (n2);
155     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
156   }
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 /* PetscCDAppendNode
161  */
162 PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
163 {
164   PetscCDIntNd *n2;
165 
166   PetscFunctionBegin;
167   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
168   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
169   else {
170     do {
171       if (!n2->next) {
172         n2->next  = a_n;
173         a_n->next = NULL;
174         break;
175       }
176       n2 = n2->next;
177     } while (n2);
178     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used)
184  */
185 PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
186 {
187   PetscCDIntNd *del;
188 
189   PetscFunctionBegin;
190   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
191   PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next");
192   del          = a_last->next;
193   a_last->next = del->next;
194   /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
195   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
196   PetscFunctionReturn(PETSC_SUCCESS);
197 }
198 
199 /* PetscCDPrint
200  */
201 PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt Istart, MPI_Comm comm)
202 {
203   PetscCDIntNd *n, *n2;
204   PetscInt      ii;
205 
206   PetscFunctionBegin;
207   for (ii = 0; ii < ail->size; ii++) {
208     n2 = n = ail->array[ii];
209     if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + Istart));
210     while (n) {
211       PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid));
212       n = n->next;
213     }
214     if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n"));
215   }
216   PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx
221  */
222 PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
223 {
224   PetscCDIntNd *n;
225 
226   PetscFunctionBegin;
227   PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx);
228   PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx);
229   PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx);
230   n = ail->array[a_destidx];
231   if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
232   else {
233     do {
234       if (!n->next) {
235         n->next = ail->array[a_srcidx]; // append
236         break;
237       }
238       n = n->next;
239     } while (1);
240   }
241   ail->array[a_srcidx] = NULL; // empty
242   PetscFunctionReturn(PETSC_SUCCESS);
243 }
244 
245 /* PetscCDRemoveAllAt - empty one list and move data to cache
246  */
247 PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx)
248 {
249   PetscCDIntNd *rem, *n1;
250 
251   PetscFunctionBegin;
252   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
253   rem               = ail->array[a_idx];
254   ail->array[a_idx] = NULL;
255   if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
256   else {
257     while (n1->next) n1 = n1->next;
258     n1->next = rem;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 /* PetscCDCountAt
264  */
265 PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
266 {
267   PetscCDIntNd *n1;
268   PetscInt      sz = 0;
269 
270   PetscFunctionBegin;
271   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
272   n1 = ail->array[a_idx];
273   while (n1) {
274     n1 = n1->next;
275     sz++;
276   }
277   *a_sz = sz;
278   PetscFunctionReturn(PETSC_SUCCESS);
279 }
280 
281 /* PetscCDSize
282  */
283 PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz)
284 {
285   PetscInt sz = 0;
286 
287   PetscFunctionBegin;
288   for (PetscInt ii = 0; ii < ail->size; ii++) {
289     PetscCDIntNd *n1 = ail->array[ii];
290 
291     while (n1) {
292       n1 = n1->next;
293       sz++;
294     }
295   }
296   *a_sz = sz;
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /* PetscCDIsEmptyAt - Is the list empty? (not used)
301  */
302 PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
303 {
304   PetscFunctionBegin;
305   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
306   *a_e = (PetscBool)(ail->array[a_idx] == NULL);
307   PetscFunctionReturn(PETSC_SUCCESS);
308 }
309 
310 /* PetscCDGetNonemptyIS - used for C-F methods
311  */
312 PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis)
313 {
314   PetscCDIntNd *n;
315   PetscInt      ii, kk;
316   PetscInt     *permute;
317 
318   PetscFunctionBegin;
319   for (ii = kk = 0; ii < ail->size; ii++) {
320     n = ail->array[ii];
321     if (n) kk++;
322   }
323   PetscCall(PetscMalloc1(kk, &permute));
324   for (ii = kk = 0; ii < ail->size; ii++) {
325     n = ail->array[ii];
326     if (n) permute[kk++] = ii;
327   }
328   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis));
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
331 
332 /* PetscCDGetMat
333  */
334 PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
335 {
336   PetscFunctionBegin;
337   *a_mat = ail->mat;
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 /* PetscCDSetMat
342  */
343 PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
344 {
345   PetscFunctionBegin;
346   if (ail->mat) {
347     PetscCall(MatDestroy(&ail->mat)); //should not happen
348   }
349   ail->mat = a_mat;
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353 /* PetscCDClearMat
354  */
355 PetscErrorCode PetscCDClearMat(PetscCoarsenData *ail)
356 {
357   PetscFunctionBegin;
358   ail->mat = NULL;
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
363  */
364 PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is)
365 {
366   PetscCDIntNd *n;
367   PetscInt      lsz, ii, kk, *idxs, jj, gid;
368   IS           *is_loc = NULL;
369 
370   PetscFunctionBegin;
371   for (ii = kk = 0; ii < ail->size; ii++) {
372     if (ail->array[ii]) kk++;
373   }
374   *a_sz = kk;
375   PetscCall(PetscMalloc1(kk, &is_loc));
376   for (ii = kk = 0; ii < ail->size; ii++) {
377     for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
378       ;
379     if (lsz) {
380       PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
381       for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
382         PetscCall(PetscCDIntNdGetID(n, &gid));
383         for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
384       }
385       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]));
386     }
387   }
388   PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk);
389   *a_local_is = is_loc; /* out */
390   PetscFunctionReturn(PETSC_SUCCESS);
391 }
392 
393 /* edge for priority queue */
394 typedef struct edge_tag {
395   PetscReal weight;
396   PetscInt  lid0, gid1, ghost1_idx;
397 } Edge;
398 
399 #define MY_MEPS (PETSC_MACHINE_EPSILON * 100)
400 static int gamg_hem_compare(const void *a, const void *b)
401 {
402   PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
403   return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */
404 }
405 
406 /*
407   MatCoarsenApply_HEM_private - parallel heavy edge matching
408 
409   Input Parameter:
410    . a_Gmat - global matrix of the graph
411    . n_iter - number of matching iterations
412    . threshold - threshold for filtering graphs
413 
414   Output Parameter:
415    . a_locals_llist - array of list of local nodes rooted at local node
416 */
417 static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist)
418 {
419 #define REQ_BF_SIZE 100
420   PetscBool         isMPI;
421   MPI_Comm          comm;
422   PetscInt          ix, *ii, *aj, Istart, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0;
423   PetscMPIInt       rank, size, comm_procs[REQ_BF_SIZE], ncomm_procs, *lid_max_pe;
424   const PetscInt    nloc = a_Gmat->rmap->n, request_size = PetscCeilInt((int)sizeof(MPI_Request), (int)sizeof(PetscInt));
425   PetscInt         *lid_cprowID;
426   PetscBool        *lid_matched;
427   Mat_SeqAIJ       *matA, *matB = NULL;
428   Mat_MPIAIJ       *mpimat     = NULL;
429   PetscScalar       one        = 1.;
430   PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL;
431   Mat               cMat, tMat, P;
432   MatScalar        *ap;
433   IS                info_is;
434 
435   PetscFunctionBegin;
436   PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm));
437   PetscCallMPI(MPI_Comm_rank(comm, &rank));
438   PetscCallMPI(MPI_Comm_size(comm, &size));
439   PetscCall(MatGetOwnershipRange(a_Gmat, &Istart, NULL));
440   PetscCall(ISCreate(comm, &info_is));
441   PetscCall(PetscInfo(info_is, "Start %" PetscInt_FMT " iterations of HEM.\n", n_iter));
442 
443   PetscCall(PetscMalloc3(nloc, &lid_matched, nloc, &lid_cprowID, nloc, &lid_max_pe));
444   PetscCall(PetscCDCreate(nloc, &agg_llists));
445   PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1));
446   *a_locals_llist = agg_llists;
447   /* add self to all lists */
448   for (PetscInt kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, Istart + kk));
449   /* make a copy of the graph, this gets destroyed in iterates */
450   PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat));
451   PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat));
452   isMPI = (PetscBool)(size > 1);
453   if (isMPI) {
454     /* list of deleted ghosts, should compress this */
455     PetscCall(PetscCDCreate(size, &ghost_deleted_list));
456     PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100));
457   }
458   for (PetscInt iter = 0; iter < n_iter; iter++) {
459     const PetscScalar *lghost_max_ew, *lid_max_ew;
460     PetscBool         *lghost_matched;
461     PetscMPIInt       *lghost_pe, *lghost_max_pe;
462     Vec                locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE;
463     PetscInt          *lghost_gid, nEdges, nEdges0, num_ghosts = 0;
464     Edge              *Edges;
465     const PetscInt     n_sub_its = 1000; // in case of a bug, stop at some point
466 
467     /* get submatrices of cMat */
468     for (PetscInt kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
469     if (isMPI) {
470       mpimat = (Mat_MPIAIJ *)cMat->data;
471       matA   = (Mat_SeqAIJ *)mpimat->A->data;
472       matB   = (Mat_SeqAIJ *)mpimat->B->data;
473       if (!matB->compressedrow.use) {
474         /* force construction of compressed row data structure since code below requires it */
475         PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0));
476       }
477       /* set index into compressed row 'lid_cprowID' */
478       for (ix = 0; ix < matB->compressedrow.nrows; ix++) {
479         PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix];
480         if (ridx[ix] >= 0) lid_cprowID[lid] = ix;
481       }
482     } else {
483       matA = (Mat_SeqAIJ *)cMat->data;
484     }
485     /* set matched flags: true for empty list */
486     for (PetscInt kk = 0; kk < nloc; kk++) {
487       PetscCall(PetscCDCountAt(agg_llists, kk, &ix));
488       if (ix > 0) lid_matched[kk] = PETSC_FALSE;
489       else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched
490     }
491     /* max edge and pe vecs */
492     PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL));
493     PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL));
494     /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */
495     if (isMPI) {
496       Vec                vec;
497       PetscScalar        vval;
498       const PetscScalar *buf;
499 
500       PetscCall(MatCreateVecs(cMat, &vec, NULL));
501       PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
502       /* lghost_matched */
503       for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
504         PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
505 
506         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES));
507       }
508       PetscCall(VecAssemblyBegin(vec));
509       PetscCall(VecAssemblyEnd(vec));
510       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
511       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
512       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
513       PetscCall(PetscMalloc4(num_ghosts, &lghost_matched, num_ghosts, &lghost_pe, num_ghosts, &lghost_gid, num_ghosts, &lghost_max_pe));
514 
515       for (PetscInt kk = 0; kk < num_ghosts; kk++) {
516         lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now
517       }
518       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
519       /* lghost_pe */
520       vval = (PetscScalar)(rank);
521       for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
522       PetscCall(VecAssemblyBegin(vec));
523       PetscCall(VecAssemblyEnd(vec));
524       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
525       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
526       PetscCall(VecGetArrayRead(mpimat->lvec, &buf));                                                   /* get proc ID in 'buf' */
527       for (PetscInt kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now
528       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
529       /* lghost_gid */
530       for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
531         vval = (PetscScalar)(gid);
532 
533         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
534       }
535       PetscCall(VecAssemblyBegin(vec));
536       PetscCall(VecAssemblyEnd(vec));
537       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
538       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
539       PetscCall(VecDestroy(&vec));
540       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */
541       for (PetscInt kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]);
542       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
543     }
544     // get 'comm_procs' (could hoist)
545     for (PetscInt kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1;
546     for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) {
547       PetscMPIInt proc = lghost_pe[ix], idx = -1;
548 
549       for (PetscMPIInt k = 0; k < ncomm_procs && idx == -1; k++)
550         if (comm_procs[k] == proc) idx = k;
551       if (idx == -1) comm_procs[ncomm_procs++] = proc;
552       PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", ncomm_procs);
553     }
554     /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */
555     nEdges0 = 0;
556     for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
557       PetscReal   max_e = 0., tt;
558       PetscScalar vval;
559       PetscInt    lid = kk, max_pe = rank, pe, n;
560 
561       ii = matA->i;
562       n  = ii[lid + 1] - ii[lid];
563       aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
564       ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
565       for (PetscInt jj = 0; jj < n; jj++) {
566         PetscInt lidj = aj[jj];
567 
568         if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
569           if (tt > max_e) max_e = tt;
570           if (lidj > lid) nEdges0++;
571         }
572       }
573       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
574         ii = matB->compressedrow.i;
575         n  = ii[ix + 1] - ii[ix];
576         ap = matB->a + ii[ix];
577         aj = matB->j + ii[ix];
578         for (PetscInt jj = 0; jj < n; jj++) {
579           if ((tt = PetscRealPart(ap[jj])) > threshold) {
580             if (tt > max_e) max_e = tt;
581             nEdges0++;
582             if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe;
583           }
584         }
585       }
586       vval = max_e;
587       PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES));
588       vval = (PetscScalar)max_pe;
589       PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
590       if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate
591         lid_matched[lid] = PETSC_TRUE;
592         if (bc_agg == -1) {
593           bc_agg = lid;
594           PetscCall(PetscCDCreate(1, &bc_list));
595         }
596         PetscCall(PetscCDRemoveAllAt(agg_llists, lid));
597         PetscCall(PetscCDAppendID(bc_list, 0, Istart + lid));
598       }
599     }
600     PetscCall(VecAssemblyBegin(locMaxEdge));
601     PetscCall(VecAssemblyEnd(locMaxEdge));
602     PetscCall(VecAssemblyBegin(locMaxPE));
603     PetscCall(VecAssemblyEnd(locMaxPE));
604     /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */
605     if (isMPI) {
606       const PetscScalar *buf;
607 
608       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge));
609       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
610       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
611 
612       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE));
613       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
614       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
615       PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
616       for (PetscInt kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
617       PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
618     }
619     { // make lid_max_pe
620       const PetscScalar *buf;
621 
622       PetscCall(VecGetArrayRead(locMaxPE, &buf));
623       for (PetscInt kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
624       PetscCall(VecRestoreArrayRead(locMaxPE, &buf));
625     }
626     /* setup sorted list of edges, and make 'Edges' */
627     PetscCall(PetscMalloc1(nEdges0, &Edges));
628     nEdges = 0;
629     for (PetscInt kk = 0, n; kk < nloc; kk++) {
630       const PetscInt lid = kk;
631       PetscReal      tt;
632 
633       ii = matA->i;
634       n  = ii[lid + 1] - ii[lid];
635       aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
636       ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
637       for (PetscInt jj = 0; jj < n; jj++) {
638         PetscInt lidj = aj[jj];
639 
640         if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
641           if (lidj > lid) {
642             Edges[nEdges].lid0       = lid;
643             Edges[nEdges].gid1       = lidj + Istart;
644             Edges[nEdges].ghost1_idx = -1;
645             Edges[nEdges].weight     = tt;
646             nEdges++;
647           }
648         }
649       }
650       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */
651         ii = matB->compressedrow.i;
652         n  = ii[ix + 1] - ii[ix];
653         ap = matB->a + ii[ix];
654         aj = matB->j + ii[ix];
655         for (PetscInt jj = 0; jj < n; jj++) {
656           if ((tt = PetscRealPart(ap[jj])) > threshold) {
657             Edges[nEdges].lid0       = lid;
658             Edges[nEdges].gid1       = lghost_gid[aj[jj]];
659             Edges[nEdges].ghost1_idx = aj[jj];
660             Edges[nEdges].weight     = tt;
661             nEdges++;
662           }
663         }
664       }
665     }
666     PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %" PetscInt_FMT " %" PetscInt_FMT, nEdges0, nEdges);
667     if (Edges) qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
668 
669     PetscCall(PetscInfo(info_is, "[%d] HEM iteration %" PetscInt_FMT " with %" PetscInt_FMT " edges\n", rank, iter, nEdges));
670 
671     /* projection matrix */
672     PetscCall(MatCreate(comm, &P));
673     PetscCall(MatSetType(P, MATAIJ));
674     PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
675     PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL));
676     PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL));
677     PetscCall(MatSetUp(P));
678     /* process - communicate - process */
679     for (PetscInt sub_it = 0, old_num_edge = 0; /* sub_it < n_sub_its */; /* sub_it++ */) {
680       PetscInt    nactive_edges = 0, n_act_n[3], gn_act_n[3];
681       PetscMPIInt tag1, tag2;
682 
683       PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew));
684       if (isMPI) {
685         PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew));
686         PetscCall(PetscCommGetNewTag(comm, &tag1));
687         PetscCall(PetscCommGetNewTag(comm, &tag2));
688       }
689       for (PetscInt kk = 0; kk < nEdges; kk++) {
690         const Edge    *e    = &Edges[kk];
691         const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + Istart, lid1 = gid1 - Istart;
692         PetscBool      isOK = PETSC_TRUE, print = PETSC_FALSE;
693 
694         if (print)
695           PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] edge (%" PetscInt_FMT " %" PetscInt_FMT "), %s %s %s\n", rank, gid0, gid1, lid_matched[lid0] ? "true" : "false", (ghost1_idx != -1 && lghost_matched[ghost1_idx]) ? "true" : "false", (ghost1_idx == -1 && lid_matched[lid1]) ? "true" : "false"));
696         /* skip if either vertex is matched already */
697         if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue;
698 
699         nactive_edges++;
700         PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0]));
701         if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] active edge (%" PetscInt_FMT " %" PetscInt_FMT "), diff0 = %10.4e\n", rank, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight)));
702         // smaller edge, lid_max_ew get updated - e0
703         if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) {
704           if (print)
705             PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]),
706                                               (double)e->weight));
707           continue; // we are basically filter edges here
708         }
709         // e1 - local
710         if (ghost1_idx == -1) {
711           if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) {
712             if (print)
713               PetscCall(PetscSynchronizedPrintf(comm, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, gid0, gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight)));
714             continue; // we are basically filter edges here
715           }
716         } else { // e1 - ghost
717           /* see if edge might get matched on other proc */
718           PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]);
719 
720           if (print)
721             PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] CHECK GHOST e1, edge (%" PetscInt_FMT " %" PetscInt_FMT "), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, gid0, gid1, (double)PetscRealPart(lid_max_ew[lid0]),
722                                               (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx]));
723           if (g_max_e1 > e->weight + MY_MEPS) {
724             /* PetscCall(PetscSynchronizedPrintf(comm,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, gid0, gid1, g_max_e1 - e->weight, lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
725             continue;
726           } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_pe[ghost1_idx] > rank) { // is 'lghost_max_pe[ghost1_idx] > rank' needed?
727             /* check for max_ea == to this edge and larger processor that will deal with this */
728             if (print)
729               PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, gid0, gid1,
730                                                 (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight));
731             isOK = PETSC_FALSE; // this guy could delete me
732             continue;
733           } else {
734             /* PetscCall(PetscSynchronizedPrintf(comm,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, gid0, gid1, g_max_e1 - e->weight, lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
735           }
736         }
737         /* check ghost for v0 */
738         if (isOK) {
739           PetscReal max_e, ew;
740 
741           if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
742             PetscInt n;
743 
744             ii = matB->compressedrow.i;
745             n  = ii[ix + 1] - ii[ix];
746             ap = matB->a + ii[ix];
747             aj = matB->j + ii[ix];
748             for (PetscInt jj = 0; jj < n && isOK; jj++) {
749               PetscInt lidj = aj[jj];
750 
751               if (lghost_matched[lidj]) continue;
752               ew = PetscRealPart(ap[jj]);
753               if (ew <= threshold) continue;
754               max_e = PetscRealPart(lghost_max_ew[lidj]);
755 
756               /* check for max_e == to this edge and larger processor that will deal with this */
757               if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
758               PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e. ncols = %" PetscInt_FMT ", gid0 = %" PetscInt_FMT ", gid1 = %" PetscInt_FMT, (double)PetscRealPart(ew), (double)PetscRealPart(max_e), n, lid0 + Istart, lghost_gid[lidj]);
759               if (print)
760                 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t[%d] e0: looked at ghost adj (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, gid0, lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS));
761             }
762             if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%" PetscInt_FMT " %" PetscInt_FMT ") from ghost inspection\n", rank, gid0, gid1));
763           }
764           /* check local v1 */
765           if (ghost1_idx == -1) {
766             if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
767               PetscInt n;
768 
769               ii = matB->compressedrow.i;
770               n  = ii[ix + 1] - ii[ix];
771               ap = matB->a + ii[ix];
772               aj = matB->j + ii[ix];
773               for (PetscInt jj = 0; jj < n && isOK; jj++) {
774                 PetscInt lidj = aj[jj];
775 
776                 if (lghost_matched[lidj]) continue;
777                 ew = PetscRealPart(ap[jj]);
778                 if (ew <= threshold) continue;
779                 max_e = PetscRealPart(lghost_max_ew[lidj]);
780                 /* check for max_e == to this edge and larger processor that will deal with this */
781                 if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
782                 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e));
783                 if (print)
784                   PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t\t[%d] e1: looked at ghost adj (%" PetscInt_FMT " %" PetscInt_FMT "), diff = %10.4e, ghost on proc %d (max %d)\n", rank, gid0, lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj]));
785               }
786             }
787             if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%" PetscInt_FMT " %" PetscInt_FMT ") from ghost inspection\n", rank, gid0, gid1));
788           }
789         }
790         PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx]));
791         if (print)
792           PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] MATCHING (%" PetscInt_FMT " %" PetscInt_FMT ") e1 max weight = %e, e1 weight diff %e, %s. isOK = %d\n", rank, gid0, gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK));
793         /* do it */
794         if (isOK) {
795           if (ghost1_idx == -1) {
796             PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %" PetscInt_FMT " is matched", gid1);
797             lid_matched[lid1] = PETSC_TRUE;                       /* keep track of what we've done this round */
798             PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's
799           } else {
800             /* add gid1 to list of ghost deleted by me -- I need their children */
801             PetscMPIInt proc = lghost_pe[ghost1_idx];
802             PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %" PetscInt_FMT " is matched", lghost_gid[ghost1_idx]);
803             lghost_matched[ghost1_idx] = PETSC_TRUE;
804             PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */
805             PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0));
806           }
807           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
808           /* set projection */
809           PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES));
810           PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES));
811           //PetscCall(PetscPrintf(comm,"\t %" PetscInt_FMT ".%" PetscInt_FMT ") match active EDGE %" PetscInt_FMT " : (%" PetscInt_FMT " %" PetscInt_FMT ")\n",iter,sub_it, nactive_edges, gid0, gid1));
812         } /* matched */
813       } /* edge loop */
814       PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
815       if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
816       PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew));
817       // count active for test, latter, update deleted ghosts
818       n_act_n[0] = nactive_edges;
819       if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2]));
820       else n_act_n[2] = 0;
821       PetscCall(PetscCDCount(agg_llists, &n_act_n[1]));
822       PetscCallMPI(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm));
823       PetscCall(PetscInfo(info_is, "[%d] %" PetscInt_FMT ".%" PetscInt_FMT ") nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%" PetscInt_FMT ", %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], ncomm_procs, nEdges, gn_act_n[2], gn_act_n[1]));
824       /* deal with deleted ghost */
825       if (isMPI) {
826         PetscCDIntNd *pos;
827         PetscInt     *sbuffs1[REQ_BF_SIZE], ndel;
828         PetscInt     *sbuffs2[REQ_BF_SIZE];
829         MPI_Status    status;
830 
831         /* send deleted ghosts */
832         for (PetscInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
833           const PetscMPIInt proc = comm_procs[proc_idx];
834           PetscInt         *sbuff, *pt, scount;
835           MPI_Request      *request;
836 
837           /* count ghosts */
838           PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel));
839           ndel /= 2; // two entries for each proc
840           scount = 2 + 2 * ndel;
841           PetscCall(PetscMalloc1(scount + request_size, &sbuff));
842           /* save requests */
843           sbuffs1[proc_idx] = sbuff;
844           request           = (MPI_Request *)sbuff;
845           sbuff = pt = (PetscInt *)(sbuff + request_size);
846           /* write [ndel, proc, n*[gid1,gid0] */
847           *pt++ = ndel; // number of deleted to send
848           *pt++ = rank; // proc (not used)
849           PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos));
850           while (pos) {
851             PetscInt lid0, ghost_idx, gid1;
852 
853             PetscCall(PetscCDIntNdGetID(pos, &ghost_idx));
854             gid1 = lghost_gid[ghost_idx];
855             PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
856             PetscCall(PetscCDIntNdGetID(pos, &lid0));
857             PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
858             *pt++ = gid1;
859             *pt++ = lid0 + Istart; // gid0
860           }
861           PetscCheck(pt - sbuff == (ptrdiff_t)scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %zu", (pt - sbuff));
862           /* MPIU_Isend:  tag1 [ndel, proc, n*[gid1,gid0] ] */
863           PetscCallMPI(MPIU_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request));
864           PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list
865         }
866         /* receive deleted, send back partial aggregates, clear lists */
867         for (PetscInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
868           PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status));
869           {
870             PetscInt         *pt, *pt2, *pt3, *sbuff, tmp;
871             MPI_Request      *request;
872             PetscMPIInt       rcount, scount;
873             const PetscMPIInt proc = status.MPI_SOURCE;
874 
875             PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
876             if (rcount > rbuff_sz) {
877               if (rbuff) PetscCall(PetscFree(rbuff));
878               PetscCall(PetscMalloc1(rcount, &rbuff));
879               rbuff_sz = rcount;
880             }
881             /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */
882             PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status));
883             /* read and count sends *[lid0, n, n*[gid] ] */
884             pt     = rbuff;
885             scount = 0;
886             ndel   = *pt++; // number of deleted to recv
887             tmp    = *pt++; // proc (not used)
888             while (ndel--) {
889               PetscInt gid1 = *pt++, lid1 = gid1 - Istart;
890               PetscInt gh_gid0 = *pt++; // gid on other proc (not used here to count)
891 
892               PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %" PetscInt_FMT, gid1);
893               PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%" PetscInt_FMT ") received matched local gid %" PetscInt_FMT ",%" PetscInt_FMT ", with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc);
894               lid_matched[lid1] = PETSC_TRUE;                    /* keep track of what we've done this round */
895               PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n
896               scount += tmp + 2;                                 // lid0, n, n*[gid]
897             }
898             PetscCheck((pt - rbuff) == (ptrdiff_t)rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %zu; rcount: %d", pt - rbuff, rcount);
899             /* send tag2: *[gid0, n, n*[gid] ] */
900             PetscCall(PetscMalloc1(scount + request_size, &sbuff));
901             sbuffs2[proc_idx] = sbuff; /* cache request */
902             request           = (MPI_Request *)sbuff;
903             pt2 = sbuff = sbuff + request_size;
904             // read again: n, proc, n*[gid1,gid0]
905             pt   = rbuff;
906             ndel = *pt++;
907             tmp  = *pt++; // proc (not used)
908             while (ndel--) {
909               PetscInt gid1 = *pt++, lid1 = gid1 - Istart, gh_gid0 = *pt++;
910 
911               /* write [gid0, aggSz, aggSz[gid] ] */
912               *pt2++ = gh_gid0;
913               pt3    = pt2++; /* save pointer for later */
914               PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos));
915               while (pos) {
916                 PetscInt gid;
917 
918                 PetscCall(PetscCDIntNdGetID(pos, &gid));
919                 PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos));
920                 *pt2++ = gid;
921               }
922               *pt3 = (PetscInt)(pt2 - pt3 - 1);
923               /* clear list */
924               PetscCall(PetscCDRemoveAllAt(agg_llists, lid1));
925             }
926             PetscCheck((pt2 - sbuff) == (ptrdiff_t)scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %zu %d", pt2 - sbuff, scount);
927             /* MPIU_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */
928             PetscCallMPI(MPIU_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request));
929           }
930         } // proc_idx
931         /* receive tag2 *[gid0, n, n*[gid] ] */
932         for (PetscMPIInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
933           PetscMPIInt proc;
934           PetscInt   *pt;
935           int         rcount;
936 
937           PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status));
938           PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
939           if (rcount > rbuff_sz) {
940             if (rbuff) PetscCall(PetscFree(rbuff));
941             PetscCall(PetscMalloc1(rcount, &rbuff));
942             rbuff_sz = rcount;
943           }
944           proc = status.MPI_SOURCE;
945           /* MPI_Recv:  tag1 [n, proc, n*[gid1,lid0] ] */
946           PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status));
947           pt = rbuff;
948           while (pt - rbuff < rcount) {
949             PetscInt gid0 = *pt++, n = *pt++;
950 
951             while (n--) {
952               PetscInt gid1 = *pt++;
953 
954               PetscCall(PetscCDAppendID(agg_llists, gid0 - Istart, gid1));
955             }
956           }
957           PetscCheck((pt - rbuff) == (ptrdiff_t)rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %zu %d", pt - rbuff, rcount);
958         }
959         /* wait for tag1 isends */
960         for (PetscMPIInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
961           MPI_Request *request = (MPI_Request *)sbuffs1[proc_idx];
962 
963           PetscCallMPI(MPI_Wait(request, &status));
964           PetscCall(PetscFree(sbuffs1[proc_idx]));
965         }
966         /* wait for tag2 isends */
967         for (PetscMPIInt proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
968           MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx];
969 
970           PetscCallMPI(MPI_Wait(request, &status));
971           PetscCall(PetscFree(sbuffs2[proc_idx]));
972         }
973       } /* MPI */
974       /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */
975       if (isMPI) {
976         const PetscScalar *sbuff;
977 
978         for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
979           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
980 
981           PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
982         }
983         PetscCall(VecAssemblyBegin(locMaxEdge));
984         PetscCall(VecAssemblyEnd(locMaxEdge));
985         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
986         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
987         PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff));
988         for (PetscInt kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); }
989         PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff));
990       }
991       /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */
992       for (PetscInt kk = 0, gid = Istart; kk < nloc; kk++, gid++) {
993         PetscReal      max_e = 0., tt;
994         PetscScalar    vval;
995         const PetscInt lid    = kk;
996         PetscMPIInt    max_pe = rank, pe, n;
997 
998         ii = matA->i;
999         PetscCall(PetscMPIIntCast(ii[lid + 1] - ii[lid], &n));
1000         aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
1001         ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
1002         for (PetscMPIInt jj = 0; jj < n; jj++) {
1003           PetscInt lidj = aj[jj];
1004 
1005           if (lid_matched[lidj]) continue; /* this is new - can change local max */
1006           if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
1007         }
1008         if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
1009           ii = matB->compressedrow.i;
1010           PetscCall(PetscMPIIntCast(ii[ix + 1] - ii[ix], &n));
1011           ap = matB->a + ii[ix];
1012           aj = matB->j + ii[ix];
1013           for (PetscMPIInt jj = 0; jj < n; jj++) {
1014             PetscInt lidj = aj[jj];
1015 
1016             if (lghost_matched[lidj]) continue;
1017             if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
1018           }
1019         }
1020         vval = (PetscScalar)max_e;
1021         PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
1022         // max PE with max edge
1023         if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
1024           ii = matB->compressedrow.i;
1025           PetscCall(PetscMPIIntCast(ii[ix + 1] - ii[ix], &n));
1026           ap = matB->a + ii[ix];
1027           aj = matB->j + ii[ix];
1028           for (PetscInt jj = 0; jj < n; jj++) {
1029             PetscInt lidj = aj[jj];
1030 
1031             if (lghost_matched[lidj]) continue;
1032             if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; }
1033           }
1034         }
1035         vval = (PetscScalar)max_pe;
1036         PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
1037       }
1038       PetscCall(VecAssemblyBegin(locMaxEdge));
1039       PetscCall(VecAssemblyEnd(locMaxEdge));
1040       PetscCall(VecAssemblyBegin(locMaxPE));
1041       PetscCall(VecAssemblyEnd(locMaxPE));
1042       /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/
1043       if (isMPI) {
1044         const PetscScalar *buf;
1045 
1046         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1047         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1048         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1049         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1050         PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
1051         for (PetscInt kk = 0; kk < num_ghosts; kk++) {
1052           lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
1053         }
1054         PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
1055       }
1056       // if no active edges, stop
1057       if (gn_act_n[0] < 1) break;
1058       // inc and check (self stopping iteration
1059       PetscCheck(old_num_edge != gn_act_n[0], PETSC_COMM_SELF, PETSC_ERR_SUP, "HEM stalled step %" PetscInt_FMT "/%" PetscInt_FMT, sub_it + 1, n_sub_its);
1060       sub_it++;
1061       PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %" PetscInt_FMT "/%" PetscInt_FMT, sub_it + 1, n_sub_its);
1062       old_num_edge = gn_act_n[0];
1063     } /* sub_it loop */
1064     /* clean up iteration */
1065     PetscCall(PetscFree(Edges));
1066     if (isMPI) { // can be hoisted
1067       PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
1068       PetscCall(VecDestroy(&ghostMaxEdge));
1069       PetscCall(VecDestroy(&ghostMaxPE));
1070       PetscCall(PetscFree4(lghost_matched, lghost_pe, lghost_gid, lghost_max_pe));
1071     }
1072     PetscCall(VecDestroy(&locMaxEdge));
1073     PetscCall(VecDestroy(&locMaxPE));
1074     /* create next graph */
1075     {
1076       Vec diag;
1077 
1078       /* add identity for unmatched vertices so they stay alive */
1079       for (PetscInt kk = 0, gid1, gid = Istart; kk < nloc; kk++, gid++) {
1080         if (!lid_matched[kk]) {
1081           const PetscInt lid = kk;
1082           PetscCDIntNd  *pos;
1083 
1084           PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1085           PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %" PetscInt_FMT, gid);
1086           PetscCall(PetscCDIntNdGetID(pos, &gid1));
1087           PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%" PetscInt_FMT ") in singleton not %" PetscInt_FMT, gid1, gid);
1088           PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES));
1089         }
1090       }
1091       PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
1092       PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));
1093 
1094       /* project to make new graph with collapsed edges */
1095       PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat));
1096       PetscCall(MatDestroy(&P));
1097       PetscCall(MatDestroy(&cMat));
1098       cMat = tMat;
1099       PetscCall(MatCreateVecs(cMat, &diag, NULL));
1100       PetscCall(MatGetDiagonal(cMat, diag));
1101       PetscCall(VecReciprocal(diag));
1102       PetscCall(VecSqrtAbs(diag));
1103       PetscCall(MatDiagonalScale(cMat, diag, diag));
1104       PetscCall(VecDestroy(&diag));
1105     }
1106   } /* coarsen iterator */
1107 
1108   /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */
1109   if (size > 1) {
1110     Mat           mat;
1111     PetscCDIntNd *pos;
1112     PetscInt      NN, MM, jj = 0, mxsz = 0;
1113 
1114     for (PetscInt kk = 0; kk < nloc; kk++) {
1115       PetscCall(PetscCDCountAt(agg_llists, kk, &jj));
1116       if (jj > mxsz) mxsz = jj;
1117     }
1118     PetscCall(MatGetSize(a_Gmat, &MM, &NN));
1119     if (mxsz > MM - nloc) mxsz = MM - nloc;
1120     /* matrix of ghost adj for square graph */
1121     PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat));
1122     for (PetscInt lid = 0, gid = Istart; lid < nloc; lid++, gid++) {
1123       PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1124       while (pos) {
1125         PetscInt gid1;
1126 
1127         PetscCall(PetscCDIntNdGetID(pos, &gid1));
1128         PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
1129         if (gid1 < Istart || gid1 >= Istart + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES));
1130       }
1131     }
1132     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1133     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1134     PetscCall(PetscCDSetMat(agg_llists, mat));
1135     PetscCall(PetscCDDestroy(ghost_deleted_list));
1136     if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true
1137   }
1138   // move BCs into some node
1139   if (bc_list) {
1140     PetscCDIntNd *pos;
1141 
1142     PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos));
1143     while (pos) {
1144       PetscInt gid1;
1145 
1146       PetscCall(PetscCDIntNdGetID(pos, &gid1));
1147       PetscCall(PetscCDGetNextPos(bc_list, 0, &pos));
1148       PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1));
1149     }
1150     PetscCall(PetscCDRemoveAllAt(bc_list, 0));
1151     PetscCall(PetscCDDestroy(bc_list));
1152   }
1153   {
1154     // check sizes -- all vertices must get in graph
1155     PetscInt sz, globalsz, MM;
1156 
1157     PetscCall(MatGetSize(a_Gmat, &MM, NULL));
1158     PetscCall(PetscCDCount(agg_llists, &sz));
1159     PetscCallMPI(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm));
1160     PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %" PetscInt_FMT " equations ?", (MM - globalsz));
1161   }
1162   // cleanup
1163   PetscCall(MatDestroy(&cMat));
1164   PetscCall(PetscFree3(lid_matched, lid_cprowID, lid_max_pe));
1165   PetscCall(ISDestroy(&info_is));
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
1169 /*
1170    HEM coarsen, simple greedy.
1171 */
1172 static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1173 {
1174   Mat mat = coarse->graph;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists));
1178   PetscFunctionReturn(PETSC_SUCCESS);
1179 }
1180 
1181 static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1182 {
1183   PetscMPIInt rank;
1184   PetscBool   iascii;
1185 
1186   PetscFunctionBegin;
1187   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank));
1188   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1189   if (iascii) {
1190     PetscCDIntNd     *pos, *pos2;
1191     PetscViewerFormat format;
1192 
1193     PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT " matching steps with threshold = %g\n", coarse->max_it, (double)coarse->threshold));
1194     PetscCall(PetscViewerGetFormat(viewer, &format));
1195     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1196       if (coarse->agg_lists) {
1197         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1198         for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) {
1199           PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos));
1200           if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %" PetscInt_FMT ": ", kk));
1201           while (pos) {
1202             PetscInt gid1;
1203 
1204             PetscCall(PetscCDIntNdGetID(pos, &gid1));
1205             PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos));
1206             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %" PetscInt_FMT " ", gid1));
1207           }
1208           if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1209         }
1210         PetscCall(PetscViewerFlush(viewer));
1211         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1212       } else {
1213         PetscCall(PetscViewerASCIIPrintf(viewer, "  HEM aggregator lists are not available\n"));
1214       }
1215     }
1216   }
1217   PetscFunctionReturn(PETSC_SUCCESS);
1218 }
1219 
1220 /*MC
1221    MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener
1222 
1223    Level: beginner
1224 
1225 .seealso: `MatCoarsen`, `MatCoarsenMISKSetDistance()`, `MatCoarsenApply()`, `MatCoarsenSetType()`, `MatCoarsenType`, `MatCoarsenCreate()`, `MATCOARSENMISK`, `MATCOARSENMIS`
1226 M*/
1227 
1228 PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1229 {
1230   PetscFunctionBegin;
1231   coarse->ops->apply = MatCoarsenApply_HEM;
1232   coarse->ops->view  = MatCoarsenView_HEM;
1233   coarse->max_it     = 4;
1234   PetscFunctionReturn(PETSC_SUCCESS);
1235 }
1236