xref: /petsc/src/dm/impls/plex/plexpartition.c (revision c8c5c547f526914c69472d8cade615559dc64129)
1 #include <petsc/private/dmpleximpl.h> /*I      "petscdmplex.h"   I*/
2 #include <petsc/private/partitionerimpl.h>
3 #include <petsc/private/hashseti.h>
4 
5 const char *const DMPlexCSRAlgorithms[] = {"mat", "graph", "overlap", "DMPlexCSRAlgorithm", "DM_PLEX_CSR_", NULL};
6 
7 static inline PetscInt DMPlex_GlobalID(PetscInt point)
8 {
9   return point >= 0 ? point : -(point + 1);
10 }
11 
12 static PetscErrorCode DMPlexCreatePartitionerGraph_Overlap(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
13 {
14   DM              ovdm;
15   PetscSF         sfPoint;
16   IS              cellNumbering;
17   const PetscInt *cellNum;
18   PetscInt       *adj = NULL, *vOffsets = NULL, *vAdj = NULL;
19   PetscBool       useCone, useClosure;
20   PetscInt        dim, depth, overlap, cStart, cEnd, c, v;
21   PetscMPIInt     rank, size;
22 
23   PetscFunctionBegin;
24   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
25   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
26   PetscCall(DMGetDimension(dm, &dim));
27   PetscCall(DMPlexGetDepth(dm, &depth));
28   if (dim != depth) {
29     /* We do not handle the uninterpolated case here */
30     PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
31     /* DMPlexCreateNeighborCSR does not make a numbering */
32     if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
33     /* Different behavior for empty graphs */
34     if (!*numVertices) {
35       PetscCall(PetscMalloc1(1, offsets));
36       (*offsets)[0] = 0;
37     }
38     /* Broken in parallel */
39     if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
40     PetscFunctionReturn(PETSC_SUCCESS);
41   }
42   /* Always use FVM adjacency to create partitioner graph */
43   PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
44   PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
45   /* Need overlap >= 1 */
46   PetscCall(DMPlexGetOverlap(dm, &overlap));
47   if (size && overlap < 1) {
48     PetscCall(DMPlexDistributeOverlap(dm, 1, NULL, &ovdm));
49   } else {
50     PetscCall(PetscObjectReference((PetscObject)dm));
51     ovdm = dm;
52   }
53   PetscCall(DMGetPointSF(ovdm, &sfPoint));
54   PetscCall(DMPlexGetHeightStratum(ovdm, height, &cStart, &cEnd));
55   PetscCall(DMPlexCreateNumbering_Plex(ovdm, cStart, cEnd, 0, NULL, sfPoint, &cellNumbering));
56   if (globalNumbering) {
57     PetscCall(PetscObjectReference((PetscObject)cellNumbering));
58     *globalNumbering = cellNumbering;
59   }
60   PetscCall(ISGetIndices(cellNumbering, &cellNum));
61   /* Determine sizes */
62   for (*numVertices = 0, c = cStart; c < cEnd; ++c) {
63     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
64     if (cellNum[c - cStart] < 0) continue;
65     (*numVertices)++;
66   }
67   PetscCall(PetscCalloc1(*numVertices + 1, &vOffsets));
68   for (c = cStart, v = 0; c < cEnd; ++c) {
69     PetscInt adjSize = PETSC_DETERMINE, a, vsize = 0;
70 
71     if (cellNum[c - cStart] < 0) continue;
72     PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj));
73     for (a = 0; a < adjSize; ++a) {
74       const PetscInt point = adj[a];
75       if (point != c && cStart <= point && point < cEnd) ++vsize;
76     }
77     vOffsets[v + 1] = vOffsets[v] + vsize;
78     ++v;
79   }
80   /* Determine adjacency */
81   PetscCall(PetscMalloc1(vOffsets[*numVertices], &vAdj));
82   for (c = cStart, v = 0; c < cEnd; ++c) {
83     PetscInt adjSize = PETSC_DETERMINE, a, off = vOffsets[v];
84 
85     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
86     if (cellNum[c - cStart] < 0) continue;
87     PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj));
88     for (a = 0; a < adjSize; ++a) {
89       const PetscInt point = adj[a];
90       if (point != c && cStart <= point && point < cEnd) vAdj[off++] = DMPlex_GlobalID(cellNum[point - cStart]);
91     }
92     PetscCheck(off == vOffsets[v + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Offsets %" PetscInt_FMT " should be %" PetscInt_FMT, off, vOffsets[v + 1]);
93     /* Sort adjacencies (not strictly necessary) */
94     PetscCall(PetscSortInt(off - vOffsets[v], &vAdj[vOffsets[v]]));
95     ++v;
96   }
97   PetscCall(PetscFree(adj));
98   PetscCall(ISRestoreIndices(cellNumbering, &cellNum));
99   PetscCall(ISDestroy(&cellNumbering));
100   PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
101   PetscCall(DMDestroy(&ovdm));
102   if (offsets) {
103     *offsets = vOffsets;
104   } else PetscCall(PetscFree(vOffsets));
105   if (adjacency) {
106     *adjacency = vAdj;
107   } else PetscCall(PetscFree(vAdj));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
112 {
113   PetscInt        dim, depth, p, pStart, pEnd, a, adjSize, idx, size;
114   PetscInt       *adj = NULL, *vOffsets = NULL, *graph = NULL;
115   IS              cellNumbering;
116   const PetscInt *cellNum;
117   PetscBool       useCone, useClosure;
118   PetscSection    section;
119   PetscSegBuffer  adjBuffer;
120   PetscSF         sfPoint;
121   PetscInt       *adjCells = NULL, *remoteCells = NULL;
122   const PetscInt *local;
123   PetscInt        nroots, nleaves, l;
124   PetscMPIInt     rank;
125 
126   PetscFunctionBegin;
127   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
128   PetscCall(DMGetDimension(dm, &dim));
129   PetscCall(DMPlexGetDepth(dm, &depth));
130   if (dim != depth) {
131     /* We do not handle the uninterpolated case here */
132     PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
133     /* DMPlexCreateNeighborCSR does not make a numbering */
134     if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
135     /* Different behavior for empty graphs */
136     if (!*numVertices) {
137       PetscCall(PetscMalloc1(1, offsets));
138       (*offsets)[0] = 0;
139     }
140     /* Broken in parallel */
141     if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
142     PetscFunctionReturn(PETSC_SUCCESS);
143   }
144   PetscCall(DMGetPointSF(dm, &sfPoint));
145   PetscCall(DMPlexGetHeightStratum(dm, height, &pStart, &pEnd));
146   /* Build adjacency graph via a section/segbuffer */
147   PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
148   PetscCall(PetscSectionSetChart(section, pStart, pEnd));
149   PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
150   /* Always use FVM adjacency to create partitioner graph */
151   PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
152   PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
153   PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering));
154   if (globalNumbering) {
155     PetscCall(PetscObjectReference((PetscObject)cellNumbering));
156     *globalNumbering = cellNumbering;
157   }
158   PetscCall(ISGetIndices(cellNumbering, &cellNum));
159   /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells
160      Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves)
161    */
162   PetscCall(PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL));
163   if (nroots >= 0) {
164     PetscInt fStart, fEnd, f;
165 
166     PetscCall(PetscCalloc2(nroots, &adjCells, nroots, &remoteCells));
167     PetscCall(DMPlexGetHeightStratum(dm, height + 1, &fStart, &fEnd));
168     for (l = 0; l < nroots; ++l) adjCells[l] = -3;
169     for (f = fStart; f < fEnd; ++f) {
170       const PetscInt *support;
171       PetscInt        supportSize;
172 
173       PetscCall(DMPlexGetSupport(dm, f, &support));
174       PetscCall(DMPlexGetSupportSize(dm, f, &supportSize));
175       if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
176       else if (supportSize == 2) {
177         PetscCall(PetscFindInt(support[0], nleaves, local, &p));
178         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1] - pStart]);
179         PetscCall(PetscFindInt(support[1], nleaves, local, &p));
180         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
181       }
182       /* Handle non-conforming meshes */
183       if (supportSize > 2) {
184         PetscInt        numChildren, i;
185         const PetscInt *children;
186 
187         PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, &children));
188         for (i = 0; i < numChildren; ++i) {
189           const PetscInt child = children[i];
190           if (fStart <= child && child < fEnd) {
191             PetscCall(DMPlexGetSupport(dm, child, &support));
192             PetscCall(DMPlexGetSupportSize(dm, child, &supportSize));
193             if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
194             else if (supportSize == 2) {
195               PetscCall(PetscFindInt(support[0], nleaves, local, &p));
196               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1] - pStart]);
197               PetscCall(PetscFindInt(support[1], nleaves, local, &p));
198               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
199             }
200           }
201         }
202       }
203     }
204     for (l = 0; l < nroots; ++l) remoteCells[l] = -1;
205     PetscCall(PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_REPLACE));
206     PetscCall(PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_REPLACE));
207     PetscCall(PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX));
208     PetscCall(PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX));
209   }
210   /* Combine local and global adjacencies */
211   for (*numVertices = 0, p = pStart; p < pEnd; p++) {
212     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
213     if (nroots > 0) {
214       if (cellNum[p - pStart] < 0) continue;
215     }
216     /* Add remote cells */
217     if (remoteCells) {
218       const PetscInt  gp = DMPlex_GlobalID(cellNum[p - pStart]);
219       PetscInt        coneSize, numChildren, c, i;
220       const PetscInt *cone, *children;
221 
222       PetscCall(DMPlexGetCone(dm, p, &cone));
223       PetscCall(DMPlexGetConeSize(dm, p, &coneSize));
224       for (c = 0; c < coneSize; ++c) {
225         const PetscInt point = cone[c];
226         if (remoteCells[point] >= 0 && remoteCells[point] != gp) {
227           PetscInt *PETSC_RESTRICT pBuf;
228           PetscCall(PetscSectionAddDof(section, p, 1));
229           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
230           *pBuf = remoteCells[point];
231         }
232         /* Handle non-conforming meshes */
233         PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
234         for (i = 0; i < numChildren; ++i) {
235           const PetscInt child = children[i];
236           if (remoteCells[child] >= 0 && remoteCells[child] != gp) {
237             PetscInt *PETSC_RESTRICT pBuf;
238             PetscCall(PetscSectionAddDof(section, p, 1));
239             PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
240             *pBuf = remoteCells[child];
241           }
242         }
243       }
244     }
245     /* Add local cells */
246     adjSize = PETSC_DETERMINE;
247     PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
248     for (a = 0; a < adjSize; ++a) {
249       const PetscInt point = adj[a];
250       if (point != p && pStart <= point && point < pEnd) {
251         PetscInt *PETSC_RESTRICT pBuf;
252         PetscCall(PetscSectionAddDof(section, p, 1));
253         PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
254         *pBuf = DMPlex_GlobalID(cellNum[point - pStart]);
255       }
256     }
257     (*numVertices)++;
258   }
259   PetscCall(PetscFree(adj));
260   PetscCall(PetscFree2(adjCells, remoteCells));
261   PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
262 
263   /* Derive CSR graph from section/segbuffer */
264   PetscCall(PetscSectionSetUp(section));
265   PetscCall(PetscSectionGetStorageSize(section, &size));
266   PetscCall(PetscMalloc1(*numVertices + 1, &vOffsets));
267   for (idx = 0, p = pStart; p < pEnd; p++) {
268     if (nroots > 0) {
269       if (cellNum[p - pStart] < 0) continue;
270     }
271     PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++])));
272   }
273   vOffsets[*numVertices] = size;
274   PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph));
275 
276   if (nroots >= 0) {
277     /* Filter out duplicate edges using section/segbuffer */
278     PetscCall(PetscSectionReset(section));
279     PetscCall(PetscSectionSetChart(section, 0, *numVertices));
280     for (p = 0; p < *numVertices; p++) {
281       PetscInt start = vOffsets[p], end = vOffsets[p + 1];
282       PetscInt numEdges = end - start, *PETSC_RESTRICT edges;
283       PetscCall(PetscSortRemoveDupsInt(&numEdges, &graph[start]));
284       PetscCall(PetscSectionSetDof(section, p, numEdges));
285       PetscCall(PetscSegBufferGetInts(adjBuffer, numEdges, &edges));
286       PetscCall(PetscArraycpy(edges, &graph[start], numEdges));
287     }
288     PetscCall(PetscFree(vOffsets));
289     PetscCall(PetscFree(graph));
290     /* Derive CSR graph from section/segbuffer */
291     PetscCall(PetscSectionSetUp(section));
292     PetscCall(PetscSectionGetStorageSize(section, &size));
293     PetscCall(PetscMalloc1(*numVertices + 1, &vOffsets));
294     for (idx = 0, p = 0; p < *numVertices; p++) PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++])));
295     vOffsets[*numVertices] = size;
296     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph));
297   } else {
298     /* Sort adjacencies (not strictly necessary) */
299     for (p = 0; p < *numVertices; p++) {
300       PetscInt start = vOffsets[p], end = vOffsets[p + 1];
301       PetscCall(PetscSortInt(end - start, &graph[start]));
302     }
303   }
304 
305   if (offsets) *offsets = vOffsets;
306   if (adjacency) *adjacency = graph;
307 
308   /* Cleanup */
309   PetscCall(PetscSegBufferDestroy(&adjBuffer));
310   PetscCall(PetscSectionDestroy(&section));
311   PetscCall(ISRestoreIndices(cellNumbering, &cellNum));
312   PetscCall(ISDestroy(&cellNumbering));
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
317 {
318   Mat             conn, CSR;
319   IS              fis, cis, cis_own;
320   PetscSF         sfPoint;
321   const PetscInt *rows, *cols, *ii, *jj;
322   PetscInt       *idxs, *idxs2;
323   PetscInt        dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd;
324   PetscMPIInt     rank;
325   PetscBool       flg;
326 
327   PetscFunctionBegin;
328   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
329   PetscCall(DMGetDimension(dm, &dim));
330   PetscCall(DMPlexGetDepth(dm, &depth));
331   if (dim != depth) {
332     /* We do not handle the uninterpolated case here */
333     PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
334     /* DMPlexCreateNeighborCSR does not make a numbering */
335     if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
336     /* Different behavior for empty graphs */
337     if (!*numVertices) {
338       PetscCall(PetscMalloc1(1, offsets));
339       (*offsets)[0] = 0;
340     }
341     /* Broken in parallel */
342     if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
343     PetscFunctionReturn(PETSC_SUCCESS);
344   }
345   /* Interpolated and parallel case */
346 
347   /* numbering */
348   PetscCall(DMGetPointSF(dm, &sfPoint));
349   PetscCall(DMPlexGetHeightStratum(dm, height, &cStart, &cEnd));
350   PetscCall(DMPlexGetHeightStratum(dm, height + 1, &fStart, &fEnd));
351   PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis));
352   PetscCall(DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis));
353   if (globalNumbering) PetscCall(ISDuplicate(cis, globalNumbering));
354 
355   /* get positive global ids and local sizes for facets and cells */
356   PetscCall(ISGetLocalSize(fis, &m));
357   PetscCall(ISGetIndices(fis, &rows));
358   PetscCall(PetscMalloc1(m, &idxs));
359   for (i = 0, floc = 0; i < m; i++) {
360     const PetscInt p = rows[i];
361 
362     if (p < 0) {
363       idxs[i] = -(p + 1);
364     } else {
365       idxs[i] = p;
366       floc += 1;
367     }
368   }
369   PetscCall(ISRestoreIndices(fis, &rows));
370   PetscCall(ISDestroy(&fis));
371   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis));
372 
373   PetscCall(ISGetLocalSize(cis, &m));
374   PetscCall(ISGetIndices(cis, &cols));
375   PetscCall(PetscMalloc1(m, &idxs));
376   PetscCall(PetscMalloc1(m, &idxs2));
377   for (i = 0, cloc = 0; i < m; i++) {
378     const PetscInt p = cols[i];
379 
380     if (p < 0) {
381       idxs[i] = -(p + 1);
382     } else {
383       idxs[i]       = p;
384       idxs2[cloc++] = p;
385     }
386   }
387   PetscCall(ISRestoreIndices(cis, &cols));
388   PetscCall(ISDestroy(&cis));
389   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis));
390   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own));
391 
392   /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */
393   PetscCall(MatCreate(PetscObjectComm((PetscObject)dm), &conn));
394   PetscCall(MatSetSizes(conn, floc, cloc, M, N));
395   PetscCall(MatSetType(conn, MATMPIAIJ));
396   PetscCall(DMPlexGetMaxSizes(dm, NULL, &lm));
397   PetscCallMPI(MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)dm)));
398   PetscCall(MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL));
399 
400   /* Assemble matrix */
401   PetscCall(ISGetIndices(fis, &rows));
402   PetscCall(ISGetIndices(cis, &cols));
403   for (c = cStart; c < cEnd; c++) {
404     const PetscInt *cone;
405     PetscInt        coneSize, row, col, f;
406 
407     col = cols[c - cStart];
408     PetscCall(DMPlexGetCone(dm, c, &cone));
409     PetscCall(DMPlexGetConeSize(dm, c, &coneSize));
410     for (f = 0; f < coneSize; f++) {
411       const PetscScalar v = 1.0;
412       const PetscInt   *children;
413       PetscInt          numChildren, ch;
414 
415       row = rows[cone[f] - fStart];
416       PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES));
417 
418       /* non-conforming meshes */
419       PetscCall(DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children));
420       for (ch = 0; ch < numChildren; ch++) {
421         const PetscInt child = children[ch];
422 
423         if (child < fStart || child >= fEnd) continue;
424         row = rows[child - fStart];
425         PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES));
426       }
427     }
428   }
429   PetscCall(ISRestoreIndices(fis, &rows));
430   PetscCall(ISRestoreIndices(cis, &cols));
431   PetscCall(ISDestroy(&fis));
432   PetscCall(ISDestroy(&cis));
433   PetscCall(MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY));
434   PetscCall(MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY));
435 
436   /* Get parallel CSR by doing conn^T * conn */
437   PetscCall(MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR));
438   PetscCall(MatDestroy(&conn));
439 
440   /* extract local part of the CSR */
441   PetscCall(MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn));
442   PetscCall(MatDestroy(&CSR));
443   PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg));
444   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
445 
446   /* get back requested output */
447   if (numVertices) *numVertices = m;
448   if (offsets) {
449     PetscCall(PetscCalloc1(m + 1, &idxs));
450     for (i = 1; i < m + 1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */
451     *offsets = idxs;
452   }
453   if (adjacency) {
454     PetscCall(PetscMalloc1(ii[m] - m, &idxs));
455     PetscCall(ISGetIndices(cis_own, &rows));
456     for (i = 0, c = 0; i < m; i++) {
457       PetscInt j, g = rows[i];
458 
459       for (j = ii[i]; j < ii[i + 1]; j++) {
460         if (jj[j] == g) continue; /* again, self-connectivity */
461         idxs[c++] = jj[j];
462       }
463     }
464     PetscCheck(c == ii[m] - m, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, c, ii[m] - m);
465     PetscCall(ISRestoreIndices(cis_own, &rows));
466     *adjacency = idxs;
467   }
468 
469   /* cleanup */
470   PetscCall(ISDestroy(&cis_own));
471   PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg));
472   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
473   PetscCall(MatDestroy(&conn));
474   PetscFunctionReturn(PETSC_SUCCESS);
475 }
476 
477 /*@C
478   DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner
479 
480   Collective on dm
481 
482   Input Parameters:
483 + dm      - The mesh `DM`
484 - height  - Height of the strata from which to construct the graph
485 
486   Output Parameters:
487 + numVertices     - Number of vertices in the graph
488 . offsets         - Point offsets in the graph
489 . adjacency       - Point connectivity in the graph
490 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency".  Negative indicates that the cell is a duplicate from another process.
491 
492   Options Database Keys:
493 . -dm_plex_csr_alg <mat,graph,overlap> - Choose the algorithm for computing the CSR graph
494 
495   Level: developer
496 
497   Note:
498   The user can control the definition of adjacency for the mesh using `DMSetAdjacency()`. They should choose the combination appropriate for the function
499   representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree().
500 
501 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `PetscPartitionerGetType()`, `PetscPartitionerCreate()`, `DMSetAdjacency()`
502 @*/
503 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
504 {
505   DMPlexCSRAlgorithm alg = DM_PLEX_CSR_GRAPH;
506 
507   PetscFunctionBegin;
508   PetscCall(PetscOptionsGetEnum(((PetscObject)dm)->options, ((PetscObject)dm)->prefix, "-dm_plex_csr_alg", DMPlexCSRAlgorithms, (PetscEnum *)&alg, NULL));
509   switch (alg) {
510   case DM_PLEX_CSR_MAT:
511     PetscCall(DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering));
512     break;
513   case DM_PLEX_CSR_GRAPH:
514     PetscCall(DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering));
515     break;
516   case DM_PLEX_CSR_OVERLAP:
517     PetscCall(DMPlexCreatePartitionerGraph_Overlap(dm, height, numVertices, offsets, adjacency, globalNumbering));
518     break;
519   }
520   PetscFunctionReturn(PETSC_SUCCESS);
521 }
522 
523 /*@C
524   DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format.
525 
526   Collective on dm
527 
528   Input Parameters:
529 + dm - The `DMPLEX`
530 - cellHeight - The height of mesh points to treat as cells (default should be 0)
531 
532   Output Parameters:
533 + numVertices - The number of local vertices in the graph, or cells in the mesh.
534 . offsets     - The offset to the adjacency list for each cell
535 - adjacency   - The adjacency list for all cells
536 
537   Level: advanced
538 
539   Note:
540   This is suitable for input to a mesh partitioner like ParMetis.
541 
542 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMPlexCreate()`
543 @*/
544 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency)
545 {
546   const PetscInt maxFaceCases = 30;
547   PetscInt       numFaceCases = 0;
548   PetscInt       numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */
549   PetscInt      *off, *adj;
550   PetscInt      *neighborCells = NULL;
551   PetscInt       dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell;
552 
553   PetscFunctionBegin;
554   /* For parallel partitioning, I think you have to communicate supports */
555   PetscCall(DMGetDimension(dm, &dim));
556   cellDim = dim - cellHeight;
557   PetscCall(DMPlexGetDepth(dm, &depth));
558   PetscCall(DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd));
559   if (cEnd - cStart == 0) {
560     if (numVertices) *numVertices = 0;
561     if (offsets) *offsets = NULL;
562     if (adjacency) *adjacency = NULL;
563     PetscFunctionReturn(PETSC_SUCCESS);
564   }
565   numCells  = cEnd - cStart;
566   faceDepth = depth - cellHeight;
567   if (dim == depth) {
568     PetscInt f, fStart, fEnd;
569 
570     PetscCall(PetscCalloc1(numCells + 1, &off));
571     /* Count neighboring cells */
572     PetscCall(DMPlexGetHeightStratum(dm, cellHeight + 1, &fStart, &fEnd));
573     for (f = fStart; f < fEnd; ++f) {
574       const PetscInt *support;
575       PetscInt        supportSize;
576       PetscCall(DMPlexGetSupportSize(dm, f, &supportSize));
577       PetscCall(DMPlexGetSupport(dm, f, &support));
578       if (supportSize == 2) {
579         PetscInt numChildren;
580 
581         PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, NULL));
582         if (!numChildren) {
583           ++off[support[0] - cStart + 1];
584           ++off[support[1] - cStart + 1];
585         }
586       }
587     }
588     /* Prefix sum */
589     for (c = 1; c <= numCells; ++c) off[c] += off[c - 1];
590     if (adjacency) {
591       PetscInt *tmp;
592 
593       PetscCall(PetscMalloc1(off[numCells], &adj));
594       PetscCall(PetscMalloc1(numCells + 1, &tmp));
595       PetscCall(PetscArraycpy(tmp, off, numCells + 1));
596       /* Get neighboring cells */
597       for (f = fStart; f < fEnd; ++f) {
598         const PetscInt *support;
599         PetscInt        supportSize;
600         PetscCall(DMPlexGetSupportSize(dm, f, &supportSize));
601         PetscCall(DMPlexGetSupport(dm, f, &support));
602         if (supportSize == 2) {
603           PetscInt numChildren;
604 
605           PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, NULL));
606           if (!numChildren) {
607             adj[tmp[support[0] - cStart]++] = support[1];
608             adj[tmp[support[1] - cStart]++] = support[0];
609           }
610         }
611       }
612       for (c = 0; c < cEnd - cStart; ++c) PetscAssert(tmp[c] == off[c + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %" PetscInt_FMT " != %" PetscInt_FMT " for cell %" PetscInt_FMT, tmp[c], off[c], c + cStart);
613       PetscCall(PetscFree(tmp));
614     }
615     if (numVertices) *numVertices = numCells;
616     if (offsets) *offsets = off;
617     if (adjacency) *adjacency = adj;
618     PetscFunctionReturn(PETSC_SUCCESS);
619   }
620   /* Setup face recognition */
621   if (faceDepth == 1) {
622     PetscInt cornersSeen[30] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* Could use PetscBT */
623 
624     for (c = cStart; c < cEnd; ++c) {
625       PetscInt corners;
626 
627       PetscCall(DMPlexGetConeSize(dm, c, &corners));
628       if (!cornersSeen[corners]) {
629         PetscInt nFV;
630 
631         PetscCheck(numFaceCases < maxFaceCases, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases");
632         cornersSeen[corners] = 1;
633 
634         PetscCall(DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV));
635 
636         numFaceVertices[numFaceCases++] = nFV;
637       }
638     }
639   }
640   PetscCall(PetscCalloc1(numCells + 1, &off));
641   /* Count neighboring cells */
642   for (cell = cStart; cell < cEnd; ++cell) {
643     PetscInt numNeighbors = PETSC_DETERMINE, n;
644 
645     PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells));
646     /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
647     for (n = 0; n < numNeighbors; ++n) {
648       PetscInt        cellPair[2];
649       PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
650       PetscInt        meetSize = 0;
651       const PetscInt *meet     = NULL;
652 
653       cellPair[0] = cell;
654       cellPair[1] = neighborCells[n];
655       if (cellPair[0] == cellPair[1]) continue;
656       if (!found) {
657         PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet));
658         if (meetSize) {
659           PetscInt f;
660 
661           for (f = 0; f < numFaceCases; ++f) {
662             if (numFaceVertices[f] == meetSize) {
663               found = PETSC_TRUE;
664               break;
665             }
666           }
667         }
668         PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet));
669       }
670       if (found) ++off[cell - cStart + 1];
671     }
672   }
673   /* Prefix sum */
674   for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell - 1];
675 
676   if (adjacency) {
677     PetscCall(PetscMalloc1(off[numCells], &adj));
678     /* Get neighboring cells */
679     for (cell = cStart; cell < cEnd; ++cell) {
680       PetscInt numNeighbors = PETSC_DETERMINE, n;
681       PetscInt cellOffset   = 0;
682 
683       PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells));
684       /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
685       for (n = 0; n < numNeighbors; ++n) {
686         PetscInt        cellPair[2];
687         PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
688         PetscInt        meetSize = 0;
689         const PetscInt *meet     = NULL;
690 
691         cellPair[0] = cell;
692         cellPair[1] = neighborCells[n];
693         if (cellPair[0] == cellPair[1]) continue;
694         if (!found) {
695           PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet));
696           if (meetSize) {
697             PetscInt f;
698 
699             for (f = 0; f < numFaceCases; ++f) {
700               if (numFaceVertices[f] == meetSize) {
701                 found = PETSC_TRUE;
702                 break;
703               }
704             }
705           }
706           PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet));
707         }
708         if (found) {
709           adj[off[cell - cStart] + cellOffset] = neighborCells[n];
710           ++cellOffset;
711         }
712       }
713     }
714   }
715   PetscCall(PetscFree(neighborCells));
716   if (numVertices) *numVertices = numCells;
717   if (offsets) *offsets = off;
718   if (adjacency) *adjacency = adj;
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 /*@
723   PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh
724 
725   Collective on part
726 
727   Input Parameters:
728 + part    - The `PetscPartitioner`
729 . targetSection - The `PetscSection` describing the absolute weight of each partition (can be NULL)
730 - dm      - The mesh `DM`
731 
732   Output Parameters:
733 + partSection     - The `PetscSection` giving the division of points by partition
734 - partition       - The list of points by partition
735 
736   Level: developer
737 
738   Note:
739   If the `DM` has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified
740   by the section in the transitive closure of the point.
741 
742 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `PetscPartitioner`, `PetscSection`, `DMPlexDistribute()`, `PetscPartitionerCreate()`, `PetscSectionCreate()`,
743          `PetscSectionSetChart()`, `PetscPartitionerPartition()`
744 @*/
745 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition)
746 {
747   PetscMPIInt  size;
748   PetscBool    isplex;
749   PetscSection vertSection = NULL;
750 
751   PetscFunctionBegin;
752   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1);
753   PetscValidHeaderSpecific(dm, DM_CLASSID, 2);
754   if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3);
755   PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4);
756   PetscValidPointer(partition, 5);
757   PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex));
758   PetscCheck(isplex, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)dm)->type_name);
759   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)part), &size));
760   if (size == 1) {
761     PetscInt *points;
762     PetscInt  cStart, cEnd, c;
763 
764     PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
765     PetscCall(PetscSectionReset(partSection));
766     PetscCall(PetscSectionSetChart(partSection, 0, size));
767     PetscCall(PetscSectionSetDof(partSection, 0, cEnd - cStart));
768     PetscCall(PetscSectionSetUp(partSection));
769     PetscCall(PetscMalloc1(cEnd - cStart, &points));
770     for (c = cStart; c < cEnd; ++c) points[c] = c;
771     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)part), cEnd - cStart, points, PETSC_OWN_POINTER, partition));
772     PetscFunctionReturn(PETSC_SUCCESS);
773   }
774   if (part->height == 0) {
775     PetscInt  numVertices = 0;
776     PetscInt *start       = NULL;
777     PetscInt *adjacency   = NULL;
778     IS        globalNumbering;
779 
780     if (!part->noGraph || part->viewGraph) {
781       PetscCall(DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering));
782     } else { /* only compute the number of owned local vertices */
783       const PetscInt *idxs;
784       PetscInt        p, pStart, pEnd;
785 
786       PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd));
787       PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering));
788       PetscCall(ISGetIndices(globalNumbering, &idxs));
789       for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1;
790       PetscCall(ISRestoreIndices(globalNumbering, &idxs));
791     }
792     if (part->usevwgt) {
793       PetscSection    section = dm->localSection, clSection = NULL;
794       IS              clPoints = NULL;
795       const PetscInt *gid, *clIdx;
796       PetscInt        v, p, pStart, pEnd;
797 
798       /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */
799       /* We do this only if the local section has been set */
800       if (section) {
801         PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL));
802         if (!clSection) PetscCall(DMPlexCreateClosureIndex(dm, NULL));
803         PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints));
804         PetscCall(ISGetIndices(clPoints, &clIdx));
805       }
806       PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd));
807       PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &vertSection));
808       PetscCall(PetscSectionSetChart(vertSection, 0, numVertices));
809       if (globalNumbering) {
810         PetscCall(ISGetIndices(globalNumbering, &gid));
811       } else gid = NULL;
812       for (p = pStart, v = 0; p < pEnd; ++p) {
813         PetscInt dof = 1;
814 
815         /* skip cells in the overlap */
816         if (gid && gid[p - pStart] < 0) continue;
817 
818         if (section) {
819           PetscInt cl, clSize, clOff;
820 
821           dof = 0;
822           PetscCall(PetscSectionGetDof(clSection, p, &clSize));
823           PetscCall(PetscSectionGetOffset(clSection, p, &clOff));
824           for (cl = 0; cl < clSize; cl += 2) {
825             PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */
826 
827             PetscCall(PetscSectionGetDof(section, clPoint, &clDof));
828             dof += clDof;
829           }
830         }
831         PetscCheck(dof, PETSC_COMM_SELF, PETSC_ERR_SUP, "Number of dofs for point %" PetscInt_FMT " in the local section should be positive", p);
832         PetscCall(PetscSectionSetDof(vertSection, v, dof));
833         v++;
834       }
835       if (globalNumbering) PetscCall(ISRestoreIndices(globalNumbering, &gid));
836       if (clPoints) PetscCall(ISRestoreIndices(clPoints, &clIdx));
837       PetscCall(PetscSectionSetUp(vertSection));
838     }
839     PetscCall(PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition));
840     PetscCall(PetscFree(start));
841     PetscCall(PetscFree(adjacency));
842     if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */
843       const PetscInt *globalNum;
844       const PetscInt *partIdx;
845       PetscInt       *map, cStart, cEnd;
846       PetscInt       *adjusted, i, localSize, offset;
847       IS              newPartition;
848 
849       PetscCall(ISGetLocalSize(*partition, &localSize));
850       PetscCall(PetscMalloc1(localSize, &adjusted));
851       PetscCall(ISGetIndices(globalNumbering, &globalNum));
852       PetscCall(ISGetIndices(*partition, &partIdx));
853       PetscCall(PetscMalloc1(localSize, &map));
854       PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
855       for (i = cStart, offset = 0; i < cEnd; i++) {
856         if (globalNum[i - cStart] >= 0) map[offset++] = i;
857       }
858       for (i = 0; i < localSize; i++) adjusted[i] = map[partIdx[i]];
859       PetscCall(PetscFree(map));
860       PetscCall(ISRestoreIndices(*partition, &partIdx));
861       PetscCall(ISRestoreIndices(globalNumbering, &globalNum));
862       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, localSize, adjusted, PETSC_OWN_POINTER, &newPartition));
863       PetscCall(ISDestroy(&globalNumbering));
864       PetscCall(ISDestroy(partition));
865       *partition = newPartition;
866     }
867   } else SETERRQ(PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %" PetscInt_FMT " for points to partition", part->height);
868   PetscCall(PetscSectionDestroy(&vertSection));
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 /*@
873   DMPlexGetPartitioner - Get the mesh partitioner
874 
875   Not collective
876 
877   Input Parameter:
878 . dm - The `DM`
879 
880   Output Parameter:
881 . part - The `PetscPartitioner`
882 
883   Level: developer
884 
885   Note:
886   This gets a borrowed reference, so the user should not destroy this `PetscPartitioner`.
887 
888 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `PetscPartitioner`, `PetscSection`, `DMPlexDistribute()`, `DMPlexSetPartitioner()`, `PetscPartitionerDMPlexPartition()`, `PetscPartitionerCreate()`
889 @*/
890 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part)
891 {
892   DM_Plex *mesh = (DM_Plex *)dm->data;
893 
894   PetscFunctionBegin;
895   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
896   PetscValidPointer(part, 2);
897   *part = mesh->partitioner;
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 /*@
902   DMPlexSetPartitioner - Set the mesh partitioner
903 
904   logically collective on dm
905 
906   Input Parameters:
907 + dm - The `DM`
908 - part - The partitioner
909 
910   Level: developer
911 
912   Note:
913   Any existing `PetscPartitioner` will be destroyed.
914 
915 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `PetscPartitioner`,`DMPlexDistribute()`, `DMPlexGetPartitioner()`, `PetscPartitionerCreate()`
916 @*/
917 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part)
918 {
919   DM_Plex *mesh = (DM_Plex *)dm->data;
920 
921   PetscFunctionBegin;
922   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
923   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2);
924   PetscCall(PetscObjectReference((PetscObject)part));
925   PetscCall(PetscPartitionerDestroy(&mesh->partitioner));
926   mesh->partitioner = part;
927   PetscFunctionReturn(PETSC_SUCCESS);
928 }
929 
930 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point)
931 {
932   const PetscInt *cone;
933   PetscInt        coneSize, c;
934   PetscBool       missing;
935 
936   PetscFunctionBeginHot;
937   PetscCall(PetscHSetIQueryAdd(ht, point, &missing));
938   if (missing) {
939     PetscCall(DMPlexGetCone(dm, point, &cone));
940     PetscCall(DMPlexGetConeSize(dm, point, &coneSize));
941     for (c = 0; c < coneSize; c++) PetscCall(DMPlexAddClosure_Private(dm, ht, cone[c]));
942   }
943   PetscFunctionReturn(PETSC_SUCCESS);
944 }
945 
946 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down)
947 {
948   PetscFunctionBegin;
949   if (up) {
950     PetscInt parent;
951 
952     PetscCall(DMPlexGetTreeParent(dm, point, &parent, NULL));
953     if (parent != point) {
954       PetscInt closureSize, *closure = NULL, i;
955 
956       PetscCall(DMPlexGetTransitiveClosure(dm, parent, PETSC_TRUE, &closureSize, &closure));
957       for (i = 0; i < closureSize; i++) {
958         PetscInt cpoint = closure[2 * i];
959 
960         PetscCall(PetscHSetIAdd(ht, cpoint));
961         PetscCall(DMPlexAddClosure_Tree(dm, ht, cpoint, PETSC_TRUE, PETSC_FALSE));
962       }
963       PetscCall(DMPlexRestoreTransitiveClosure(dm, parent, PETSC_TRUE, &closureSize, &closure));
964     }
965   }
966   if (down) {
967     PetscInt        numChildren;
968     const PetscInt *children;
969 
970     PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
971     if (numChildren) {
972       PetscInt i;
973 
974       for (i = 0; i < numChildren; i++) {
975         PetscInt cpoint = children[i];
976 
977         PetscCall(PetscHSetIAdd(ht, cpoint));
978         PetscCall(DMPlexAddClosure_Tree(dm, ht, cpoint, PETSC_FALSE, PETSC_TRUE));
979       }
980     }
981   }
982   PetscFunctionReturn(PETSC_SUCCESS);
983 }
984 
985 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point)
986 {
987   PetscInt parent;
988 
989   PetscFunctionBeginHot;
990   PetscCall(DMPlexGetTreeParent(dm, point, &parent, NULL));
991   if (point != parent) {
992     const PetscInt *cone;
993     PetscInt        coneSize, c;
994 
995     PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, parent));
996     PetscCall(DMPlexAddClosure_Private(dm, ht, parent));
997     PetscCall(DMPlexGetCone(dm, parent, &cone));
998     PetscCall(DMPlexGetConeSize(dm, parent, &coneSize));
999     for (c = 0; c < coneSize; c++) {
1000       const PetscInt cp = cone[c];
1001 
1002       PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, cp));
1003     }
1004   }
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point)
1009 {
1010   PetscInt        i, numChildren;
1011   const PetscInt *children;
1012 
1013   PetscFunctionBeginHot;
1014   PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
1015   for (i = 0; i < numChildren; i++) PetscCall(PetscHSetIAdd(ht, children[i]));
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 
1019 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point)
1020 {
1021   const PetscInt *cone;
1022   PetscInt        coneSize, c;
1023 
1024   PetscFunctionBeginHot;
1025   PetscCall(PetscHSetIAdd(ht, point));
1026   PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, point));
1027   PetscCall(DMPlexAddClosureTree_Down_Private(dm, ht, point));
1028   PetscCall(DMPlexGetCone(dm, point, &cone));
1029   PetscCall(DMPlexGetConeSize(dm, point, &coneSize));
1030   for (c = 0; c < coneSize; c++) PetscCall(DMPlexAddClosureTree_Private(dm, ht, cone[c]));
1031   PetscFunctionReturn(PETSC_SUCCESS);
1032 }
1033 
1034 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS)
1035 {
1036   DM_Plex        *mesh    = (DM_Plex *)dm->data;
1037   const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE;
1038   PetscInt        nelems, *elems, off = 0, p;
1039   PetscHSetI      ht = NULL;
1040 
1041   PetscFunctionBegin;
1042   PetscCall(PetscHSetICreate(&ht));
1043   PetscCall(PetscHSetIResize(ht, numPoints * 16));
1044   if (!hasTree) {
1045     for (p = 0; p < numPoints; ++p) PetscCall(DMPlexAddClosure_Private(dm, ht, points[p]));
1046   } else {
1047 #if 1
1048     for (p = 0; p < numPoints; ++p) PetscCall(DMPlexAddClosureTree_Private(dm, ht, points[p]));
1049 #else
1050     PetscInt *closure = NULL, closureSize, c;
1051     for (p = 0; p < numPoints; ++p) {
1052       PetscCall(DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure));
1053       for (c = 0; c < closureSize * 2; c += 2) {
1054         PetscCall(PetscHSetIAdd(ht, closure[c]));
1055         if (hasTree) PetscCall(DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE));
1056       }
1057     }
1058     if (closure) PetscCall(DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure));
1059 #endif
1060   }
1061   PetscCall(PetscHSetIGetSize(ht, &nelems));
1062   PetscCall(PetscMalloc1(nelems, &elems));
1063   PetscCall(PetscHSetIGetElems(ht, &off, elems));
1064   PetscCall(PetscHSetIDestroy(&ht));
1065   PetscCall(PetscSortInt(nelems, elems));
1066   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 /*@
1071   DMPlexPartitionLabelClosure - Add the closure of all points to the partition label
1072 
1073   Input Parameters:
1074 + dm     - The `DM`
1075 - label  - `DMLabel` assigning ranks to remote roots
1076 
1077   Level: developer
1078 
1079 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMLabel`, `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1080 @*/
1081 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label)
1082 {
1083   IS              rankIS, pointIS, closureIS;
1084   const PetscInt *ranks, *points;
1085   PetscInt        numRanks, numPoints, r;
1086 
1087   PetscFunctionBegin;
1088   PetscCall(DMLabelGetValueIS(label, &rankIS));
1089   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1090   PetscCall(ISGetIndices(rankIS, &ranks));
1091   for (r = 0; r < numRanks; ++r) {
1092     const PetscInt rank = ranks[r];
1093     PetscCall(DMLabelGetStratumIS(label, rank, &pointIS));
1094     PetscCall(ISGetLocalSize(pointIS, &numPoints));
1095     PetscCall(ISGetIndices(pointIS, &points));
1096     PetscCall(DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS));
1097     PetscCall(ISRestoreIndices(pointIS, &points));
1098     PetscCall(ISDestroy(&pointIS));
1099     PetscCall(DMLabelSetStratumIS(label, rank, closureIS));
1100     PetscCall(ISDestroy(&closureIS));
1101   }
1102   PetscCall(ISRestoreIndices(rankIS, &ranks));
1103   PetscCall(ISDestroy(&rankIS));
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 /*@
1108   DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label
1109 
1110   Input Parameters:
1111 + dm     - The `DM`
1112 - label  - `DMLabel` assigning ranks to remote roots
1113 
1114   Level: developer
1115 
1116 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMLabel`, `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1117 @*/
1118 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label)
1119 {
1120   IS              rankIS, pointIS;
1121   const PetscInt *ranks, *points;
1122   PetscInt        numRanks, numPoints, r, p, a, adjSize;
1123   PetscInt       *adj = NULL;
1124 
1125   PetscFunctionBegin;
1126   PetscCall(DMLabelGetValueIS(label, &rankIS));
1127   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1128   PetscCall(ISGetIndices(rankIS, &ranks));
1129   for (r = 0; r < numRanks; ++r) {
1130     const PetscInt rank = ranks[r];
1131 
1132     PetscCall(DMLabelGetStratumIS(label, rank, &pointIS));
1133     PetscCall(ISGetLocalSize(pointIS, &numPoints));
1134     PetscCall(ISGetIndices(pointIS, &points));
1135     for (p = 0; p < numPoints; ++p) {
1136       adjSize = PETSC_DETERMINE;
1137       PetscCall(DMPlexGetAdjacency(dm, points[p], &adjSize, &adj));
1138       for (a = 0; a < adjSize; ++a) PetscCall(DMLabelSetValue(label, adj[a], rank));
1139     }
1140     PetscCall(ISRestoreIndices(pointIS, &points));
1141     PetscCall(ISDestroy(&pointIS));
1142   }
1143   PetscCall(ISRestoreIndices(rankIS, &ranks));
1144   PetscCall(ISDestroy(&rankIS));
1145   PetscCall(PetscFree(adj));
1146   PetscFunctionReturn(PETSC_SUCCESS);
1147 }
1148 
1149 /*@
1150   DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point `PetscSF`
1151 
1152   Input Parameters:
1153 + dm     - The `DM`
1154 - label  - `DMLabel` assigning ranks to remote roots
1155 
1156   Level: developer
1157 
1158   Note:
1159   This is required when generating multi-level overlaps to capture
1160   overlap points from non-neighbouring partitions.
1161 
1162 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMLabel`, `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1163 @*/
1164 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label)
1165 {
1166   MPI_Comm        comm;
1167   PetscMPIInt     rank;
1168   PetscSF         sfPoint;
1169   DMLabel         lblRoots, lblLeaves;
1170   IS              rankIS, pointIS;
1171   const PetscInt *ranks;
1172   PetscInt        numRanks, r;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1176   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1177   PetscCall(DMGetPointSF(dm, &sfPoint));
1178   /* Pull point contributions from remote leaves into local roots */
1179   PetscCall(DMLabelGather(label, sfPoint, &lblLeaves));
1180   PetscCall(DMLabelGetValueIS(lblLeaves, &rankIS));
1181   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1182   PetscCall(ISGetIndices(rankIS, &ranks));
1183   for (r = 0; r < numRanks; ++r) {
1184     const PetscInt remoteRank = ranks[r];
1185     if (remoteRank == rank) continue;
1186     PetscCall(DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS));
1187     PetscCall(DMLabelInsertIS(label, pointIS, remoteRank));
1188     PetscCall(ISDestroy(&pointIS));
1189   }
1190   PetscCall(ISRestoreIndices(rankIS, &ranks));
1191   PetscCall(ISDestroy(&rankIS));
1192   PetscCall(DMLabelDestroy(&lblLeaves));
1193   /* Push point contributions from roots into remote leaves */
1194   PetscCall(DMLabelDistribute(label, sfPoint, &lblRoots));
1195   PetscCall(DMLabelGetValueIS(lblRoots, &rankIS));
1196   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1197   PetscCall(ISGetIndices(rankIS, &ranks));
1198   for (r = 0; r < numRanks; ++r) {
1199     const PetscInt remoteRank = ranks[r];
1200     if (remoteRank == rank) continue;
1201     PetscCall(DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS));
1202     PetscCall(DMLabelInsertIS(label, pointIS, remoteRank));
1203     PetscCall(ISDestroy(&pointIS));
1204   }
1205   PetscCall(ISRestoreIndices(rankIS, &ranks));
1206   PetscCall(ISDestroy(&rankIS));
1207   PetscCall(DMLabelDestroy(&lblRoots));
1208   PetscFunctionReturn(PETSC_SUCCESS);
1209 }
1210 
1211 /*@
1212   DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label
1213 
1214   Input Parameters:
1215 + dm        - The `DM`
1216 . rootLabel - `DMLabel` assigning ranks to local roots
1217 - processSF - A star forest mapping into the local index on each remote rank
1218 
1219   Output Parameter:
1220 . leafLabel - `DMLabel` assigning ranks to remote roots
1221 
1222   Level: developer
1223 
1224   Note:
1225   The rootLabel defines a send pattern by mapping local points to remote target ranks. The
1226   resulting leafLabel is a receiver mapping of remote roots to their parent rank.
1227 
1228 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1229 @*/
1230 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel)
1231 {
1232   MPI_Comm           comm;
1233   PetscMPIInt        rank, size, r;
1234   PetscInt           p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize;
1235   PetscSF            sfPoint;
1236   PetscSection       rootSection;
1237   PetscSFNode       *rootPoints, *leafPoints;
1238   const PetscSFNode *remote;
1239   const PetscInt    *local, *neighbors;
1240   IS                 valueIS;
1241   PetscBool          mpiOverflow = PETSC_FALSE;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(PetscLogEventBegin(DMPLEX_PartLabelInvert, dm, 0, 0, 0));
1245   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1246   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1247   PetscCallMPI(MPI_Comm_size(comm, &size));
1248   PetscCall(DMGetPointSF(dm, &sfPoint));
1249 
1250   /* Convert to (point, rank) and use actual owners */
1251   PetscCall(PetscSectionCreate(comm, &rootSection));
1252   PetscCall(PetscSectionSetChart(rootSection, 0, size));
1253   PetscCall(DMLabelGetValueIS(rootLabel, &valueIS));
1254   PetscCall(ISGetLocalSize(valueIS, &numNeighbors));
1255   PetscCall(ISGetIndices(valueIS, &neighbors));
1256   for (n = 0; n < numNeighbors; ++n) {
1257     PetscCall(DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints));
1258     PetscCall(PetscSectionAddDof(rootSection, neighbors[n], numPoints));
1259   }
1260   PetscCall(PetscSectionSetUp(rootSection));
1261   PetscCall(PetscSectionGetStorageSize(rootSection, &rootSize));
1262   PetscCall(PetscMalloc1(rootSize, &rootPoints));
1263   PetscCall(PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote));
1264   for (n = 0; n < numNeighbors; ++n) {
1265     IS              pointIS;
1266     const PetscInt *points;
1267 
1268     PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off));
1269     PetscCall(DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS));
1270     PetscCall(ISGetLocalSize(pointIS, &numPoints));
1271     PetscCall(ISGetIndices(pointIS, &points));
1272     for (p = 0; p < numPoints; ++p) {
1273       if (local) PetscCall(PetscFindInt(points[p], nleaves, local, &l));
1274       else l = -1;
1275       if (l >= 0) {
1276         rootPoints[off + p] = remote[l];
1277       } else {
1278         rootPoints[off + p].index = points[p];
1279         rootPoints[off + p].rank  = rank;
1280       }
1281     }
1282     PetscCall(ISRestoreIndices(pointIS, &points));
1283     PetscCall(ISDestroy(&pointIS));
1284   }
1285 
1286   /* Try to communicate overlap using All-to-All */
1287   if (!processSF) {
1288     PetscInt64   counter     = 0;
1289     PetscBool    locOverflow = PETSC_FALSE;
1290     PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls;
1291 
1292     PetscCall(PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls));
1293     for (n = 0; n < numNeighbors; ++n) {
1294       PetscCall(PetscSectionGetDof(rootSection, neighbors[n], &dof));
1295       PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off));
1296 #if defined(PETSC_USE_64BIT_INDICES)
1297       if (dof > PETSC_MPI_INT_MAX) {
1298         locOverflow = PETSC_TRUE;
1299         break;
1300       }
1301       if (off > PETSC_MPI_INT_MAX) {
1302         locOverflow = PETSC_TRUE;
1303         break;
1304       }
1305 #endif
1306       scounts[neighbors[n]] = (PetscMPIInt)dof;
1307       sdispls[neighbors[n]] = (PetscMPIInt)off;
1308     }
1309     PetscCallMPI(MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm));
1310     for (r = 0; r < size; ++r) {
1311       rdispls[r] = (int)counter;
1312       counter += rcounts[r];
1313     }
1314     if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE;
1315     PetscCallMPI(MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm));
1316     if (!mpiOverflow) {
1317       PetscCall(PetscInfo(dm, "Using Alltoallv for mesh distribution\n"));
1318       leafSize = (PetscInt)counter;
1319       PetscCall(PetscMalloc1(leafSize, &leafPoints));
1320       PetscCallMPI(MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm));
1321     }
1322     PetscCall(PetscFree4(scounts, sdispls, rcounts, rdispls));
1323   }
1324 
1325   /* Communicate overlap using process star forest */
1326   if (processSF || mpiOverflow) {
1327     PetscSF      procSF;
1328     PetscSection leafSection;
1329 
1330     if (processSF) {
1331       PetscCall(PetscInfo(dm, "Using processSF for mesh distribution\n"));
1332       PetscCall(PetscObjectReference((PetscObject)processSF));
1333       procSF = processSF;
1334     } else {
1335       PetscCall(PetscInfo(dm, "Using processSF for mesh distribution (MPI overflow)\n"));
1336       PetscCall(PetscSFCreate(comm, &procSF));
1337       PetscCall(PetscSFSetGraphWithPattern(procSF, NULL, PETSCSF_PATTERN_ALLTOALL));
1338     }
1339 
1340     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection));
1341     PetscCall(DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void **)&leafPoints));
1342     PetscCall(PetscSectionGetStorageSize(leafSection, &leafSize));
1343     PetscCall(PetscSectionDestroy(&leafSection));
1344     PetscCall(PetscSFDestroy(&procSF));
1345   }
1346 
1347   for (p = 0; p < leafSize; p++) PetscCall(DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank));
1348 
1349   PetscCall(ISRestoreIndices(valueIS, &neighbors));
1350   PetscCall(ISDestroy(&valueIS));
1351   PetscCall(PetscSectionDestroy(&rootSection));
1352   PetscCall(PetscFree(rootPoints));
1353   PetscCall(PetscFree(leafPoints));
1354   PetscCall(PetscLogEventEnd(DMPLEX_PartLabelInvert, dm, 0, 0, 0));
1355   PetscFunctionReturn(PETSC_SUCCESS);
1356 }
1357 
1358 /*@
1359   DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points
1360 
1361   Input Parameters:
1362 + dm    - The `DM`
1363 - label - `DMLabel` assigning ranks to remote roots
1364 
1365   Output Parameter:
1366 . sf    - The star forest communication context encapsulating the defined mapping
1367 
1368   Level: developer
1369 
1370   Note:
1371   The incoming label is a receiver mapping of remote points to their parent rank.
1372 
1373 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMLabel`, `PetscSF`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1374 @*/
1375 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf)
1376 {
1377   PetscMPIInt     rank;
1378   PetscInt        n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors;
1379   PetscSFNode    *remotePoints;
1380   IS              remoteRootIS, neighborsIS;
1381   const PetscInt *remoteRoots, *neighbors;
1382 
1383   PetscFunctionBegin;
1384   PetscCall(PetscLogEventBegin(DMPLEX_PartLabelCreateSF, dm, 0, 0, 0));
1385   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1386 
1387   PetscCall(DMLabelGetValueIS(label, &neighborsIS));
1388 #if 0
1389   {
1390     IS is;
1391     PetscCall(ISDuplicate(neighborsIS, &is));
1392     PetscCall(ISSort(is));
1393     PetscCall(ISDestroy(&neighborsIS));
1394     neighborsIS = is;
1395   }
1396 #endif
1397   PetscCall(ISGetLocalSize(neighborsIS, &nNeighbors));
1398   PetscCall(ISGetIndices(neighborsIS, &neighbors));
1399   for (numRemote = 0, n = 0; n < nNeighbors; ++n) {
1400     PetscCall(DMLabelGetStratumSize(label, neighbors[n], &numPoints));
1401     numRemote += numPoints;
1402   }
1403   PetscCall(PetscMalloc1(numRemote, &remotePoints));
1404   /* Put owned points first */
1405   PetscCall(DMLabelGetStratumSize(label, rank, &numPoints));
1406   if (numPoints > 0) {
1407     PetscCall(DMLabelGetStratumIS(label, rank, &remoteRootIS));
1408     PetscCall(ISGetIndices(remoteRootIS, &remoteRoots));
1409     for (p = 0; p < numPoints; p++) {
1410       remotePoints[idx].index = remoteRoots[p];
1411       remotePoints[idx].rank  = rank;
1412       idx++;
1413     }
1414     PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots));
1415     PetscCall(ISDestroy(&remoteRootIS));
1416   }
1417   /* Now add remote points */
1418   for (n = 0; n < nNeighbors; ++n) {
1419     const PetscInt nn = neighbors[n];
1420 
1421     PetscCall(DMLabelGetStratumSize(label, nn, &numPoints));
1422     if (nn == rank || numPoints <= 0) continue;
1423     PetscCall(DMLabelGetStratumIS(label, nn, &remoteRootIS));
1424     PetscCall(ISGetIndices(remoteRootIS, &remoteRoots));
1425     for (p = 0; p < numPoints; p++) {
1426       remotePoints[idx].index = remoteRoots[p];
1427       remotePoints[idx].rank  = nn;
1428       idx++;
1429     }
1430     PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots));
1431     PetscCall(ISDestroy(&remoteRootIS));
1432   }
1433   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), sf));
1434   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
1435   PetscCall(PetscSFSetGraph(*sf, pEnd - pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER));
1436   PetscCall(PetscSFSetUp(*sf));
1437   PetscCall(ISDestroy(&neighborsIS));
1438   PetscCall(PetscLogEventEnd(DMPLEX_PartLabelCreateSF, dm, 0, 0, 0));
1439   PetscFunctionReturn(PETSC_SUCCESS);
1440 }
1441 
1442 #if defined(PETSC_HAVE_PARMETIS)
1443   #include <parmetis.h>
1444 #endif
1445 
1446 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors
1447  * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take
1448  * them out in that case. */
1449 #if defined(PETSC_HAVE_PARMETIS)
1450 /*@C
1451 
1452   DMPlexRewriteSF - Rewrites the ownership of the `PetsSF` of a `DM` (in place).
1453 
1454   Input parameters:
1455 + dm                - The `DMPLEX` object.
1456 . n                 - The number of points.
1457 . pointsToRewrite   - The points in the `PetscSF` whose ownership will change.
1458 . targetOwners      - New owner for each element in pointsToRewrite.
1459 - degrees           - Degrees of the points in the `PetscSF` as obtained by `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`.
1460 
1461   Level: developer
1462 
1463 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMLabel`, `PetscSF`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1464 @*/
1465 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees)
1466 {
1467   PetscInt           pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs;
1468   PetscInt          *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew;
1469   PetscSFNode       *leafLocationsNew;
1470   const PetscSFNode *iremote;
1471   const PetscInt    *ilocal;
1472   PetscBool         *isLeaf;
1473   PetscSF            sf;
1474   MPI_Comm           comm;
1475   PetscMPIInt        rank, size;
1476 
1477   PetscFunctionBegin;
1478   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1479   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1480   PetscCallMPI(MPI_Comm_size(comm, &size));
1481   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
1482 
1483   PetscCall(DMGetPointSF(dm, &sf));
1484   PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote));
1485   PetscCall(PetscMalloc1(pEnd - pStart, &isLeaf));
1486   for (i = 0; i < pEnd - pStart; i++) isLeaf[i] = PETSC_FALSE;
1487   for (i = 0; i < nleafs; i++) isLeaf[ilocal[i] - pStart] = PETSC_TRUE;
1488 
1489   PetscCall(PetscMalloc1(pEnd - pStart + 1, &cumSumDegrees));
1490   cumSumDegrees[0] = 0;
1491   for (i = 1; i <= pEnd - pStart; i++) cumSumDegrees[i] = cumSumDegrees[i - 1] + degrees[i - 1];
1492   sumDegrees = cumSumDegrees[pEnd - pStart];
1493   /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */
1494 
1495   PetscCall(PetscMalloc1(sumDegrees, &locationsOfLeafs));
1496   PetscCall(PetscMalloc1(pEnd - pStart, &rankOnLeafs));
1497   for (i = 0; i < pEnd - pStart; i++) rankOnLeafs[i] = rank;
1498   PetscCall(PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs));
1499   PetscCall(PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs));
1500   PetscCall(PetscFree(rankOnLeafs));
1501 
1502   /* get the remote local points of my leaves */
1503   PetscCall(PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs));
1504   PetscCall(PetscMalloc1(pEnd - pStart, &points));
1505   for (i = 0; i < pEnd - pStart; i++) points[i] = pStart + i;
1506   PetscCall(PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs));
1507   PetscCall(PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs));
1508   PetscCall(PetscFree(points));
1509   /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */
1510   PetscCall(PetscMalloc1(pEnd - pStart, &newOwners));
1511   PetscCall(PetscMalloc1(pEnd - pStart, &newNumbers));
1512   for (i = 0; i < pEnd - pStart; i++) {
1513     newOwners[i]  = -1;
1514     newNumbers[i] = -1;
1515   }
1516   {
1517     PetscInt oldNumber, newNumber, oldOwner, newOwner;
1518     for (i = 0; i < n; i++) {
1519       oldNumber = pointsToRewrite[i];
1520       newNumber = -1;
1521       oldOwner  = rank;
1522       newOwner  = targetOwners[i];
1523       if (oldOwner == newOwner) {
1524         newNumber = oldNumber;
1525       } else {
1526         for (j = 0; j < degrees[oldNumber]; j++) {
1527           if (locationsOfLeafs[cumSumDegrees[oldNumber] + j] == newOwner) {
1528             newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber] + j];
1529             break;
1530           }
1531         }
1532       }
1533       PetscCheck(newNumber != -1, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex.");
1534 
1535       newOwners[oldNumber]  = newOwner;
1536       newNumbers[oldNumber] = newNumber;
1537     }
1538   }
1539   PetscCall(PetscFree(cumSumDegrees));
1540   PetscCall(PetscFree(locationsOfLeafs));
1541   PetscCall(PetscFree(remoteLocalPointOfLeafs));
1542 
1543   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners, MPI_REPLACE));
1544   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners, MPI_REPLACE));
1545   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers, MPI_REPLACE));
1546   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers, MPI_REPLACE));
1547 
1548   /* Now count how many leafs we have on each processor. */
1549   leafCounter = 0;
1550   for (i = 0; i < pEnd - pStart; i++) {
1551     if (newOwners[i] >= 0) {
1552       if (newOwners[i] != rank) leafCounter++;
1553     } else {
1554       if (isLeaf[i]) leafCounter++;
1555     }
1556   }
1557 
1558   /* Now set up the new sf by creating the leaf arrays */
1559   PetscCall(PetscMalloc1(leafCounter, &leafsNew));
1560   PetscCall(PetscMalloc1(leafCounter, &leafLocationsNew));
1561 
1562   leafCounter = 0;
1563   counter     = 0;
1564   for (i = 0; i < pEnd - pStart; i++) {
1565     if (newOwners[i] >= 0) {
1566       if (newOwners[i] != rank) {
1567         leafsNew[leafCounter]               = i;
1568         leafLocationsNew[leafCounter].rank  = newOwners[i];
1569         leafLocationsNew[leafCounter].index = newNumbers[i];
1570         leafCounter++;
1571       }
1572     } else {
1573       if (isLeaf[i]) {
1574         leafsNew[leafCounter]               = i;
1575         leafLocationsNew[leafCounter].rank  = iremote[counter].rank;
1576         leafLocationsNew[leafCounter].index = iremote[counter].index;
1577         leafCounter++;
1578       }
1579     }
1580     if (isLeaf[i]) counter++;
1581   }
1582 
1583   PetscCall(PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER));
1584   PetscCall(PetscFree(newOwners));
1585   PetscCall(PetscFree(newNumbers));
1586   PetscCall(PetscFree(isLeaf));
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer)
1591 {
1592   PetscInt   *distribution, min, max, sum;
1593   PetscMPIInt rank, size;
1594 
1595   PetscFunctionBegin;
1596   PetscCallMPI(MPI_Comm_size(comm, &size));
1597   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1598   PetscCall(PetscCalloc1(size, &distribution));
1599   for (PetscInt i = 0; i < n; i++) {
1600     if (part) distribution[part[i]] += vtxwgt[skip * i];
1601     else distribution[rank] += vtxwgt[skip * i];
1602   }
1603   PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm));
1604   min = distribution[0];
1605   max = distribution[0];
1606   sum = distribution[0];
1607   for (PetscInt i = 1; i < size; i++) {
1608     if (distribution[i] < min) min = distribution[i];
1609     if (distribution[i] > max) max = distribution[i];
1610     sum += distribution[i];
1611   }
1612   PetscCall(PetscViewerASCIIPrintf(viewer, "Min: %" PetscInt_FMT ", Avg: %" PetscInt_FMT ", Max: %" PetscInt_FMT ", Balance: %f\n", min, sum / size, max, (max * 1. * size) / sum));
1613   PetscCall(PetscFree(distribution));
1614   PetscFunctionReturn(PETSC_SUCCESS);
1615 }
1616 
1617 #endif
1618 
1619 /*@
1620   DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the `PointSF` of the `DM` inplace.
1621 
1622   Input parameters:
1623 + dm               - The `DMPLEX` object.
1624 . entityDepth      - depth of the entity to balance (0 -> balance vertices).
1625 . useInitialGuess  - whether to use the current distribution as initial guess (only used by ParMETIS).
1626 - parallel         - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS.
1627 
1628   Output parameters:
1629 . success          - whether the graph partitioning was successful or not, optional. Unsuccessful simply means no change to the partitioning
1630 
1631   Options Database Keys:
1632 +  -dm_plex_rebalance_shared_points_parmetis - Use ParMetis instead of Metis for the partitioner
1633 .  -dm_plex_rebalance_shared_points_use_initial_guess - Use current partition to bootstrap ParMetis partition
1634 .  -dm_plex_rebalance_shared_points_use_mat_partitioning - Use the MatPartitioning object to perform the partition, the prefix for those operations is -dm_plex_rebalance_shared_points_
1635 -  -dm_plex_rebalance_shared_points_monitor - Monitor the shared points rebalance process
1636 
1637   Level: intermediate
1638 
1639 .seealso: [](chapter_unstructured), `DM`, `DMPLEX`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1640 @*/
1641 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success)
1642 {
1643 #if defined(PETSC_HAVE_PARMETIS)
1644   PetscSF            sf;
1645   PetscInt           ierr, i, j, idx, jdx;
1646   PetscInt           eBegin, eEnd, nroots, nleafs, pStart, pEnd;
1647   const PetscInt    *degrees, *ilocal;
1648   const PetscSFNode *iremote;
1649   PetscBool         *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned;
1650   PetscInt           numExclusivelyOwned, numNonExclusivelyOwned;
1651   PetscMPIInt        rank, size;
1652   PetscInt          *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers;
1653   const PetscInt    *cumSumVertices;
1654   PetscInt           offset, counter;
1655   PetscInt          *vtxwgt;
1656   const PetscInt    *xadj, *adjncy;
1657   PetscInt          *part, *options;
1658   PetscInt           nparts, wgtflag, numflag, ncon, edgecut;
1659   real_t            *ubvec;
1660   PetscInt          *firstVertices, *renumbering;
1661   PetscInt           failed, failedGlobal;
1662   MPI_Comm           comm;
1663   Mat                A;
1664   PetscViewer        viewer;
1665   PetscViewerFormat  format;
1666   PetscLayout        layout;
1667   real_t            *tpwgts;
1668   PetscMPIInt       *counts, *mpiCumSumVertices;
1669   PetscInt          *pointsToRewrite;
1670   PetscInt           numRows;
1671   PetscBool          done, usematpartitioning = PETSC_FALSE;
1672   IS                 ispart = NULL;
1673   MatPartitioning    mp;
1674   const char        *prefix;
1675 
1676   PetscFunctionBegin;
1677   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1678   PetscCallMPI(MPI_Comm_size(comm, &size));
1679   if (size == 1) {
1680     if (success) *success = PETSC_TRUE;
1681     PetscFunctionReturn(PETSC_SUCCESS);
1682   }
1683   if (success) *success = PETSC_FALSE;
1684   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1685 
1686   parallel        = PETSC_FALSE;
1687   useInitialGuess = PETSC_FALSE;
1688   PetscObjectOptionsBegin((PetscObject)dm);
1689   PetscCall(PetscOptionsName("-dm_plex_rebalance_shared_points_parmetis", "Use ParMetis instead of Metis for the partitioner", "DMPlexRebalanceSharedPoints", &parallel));
1690   PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_initial_guess", "Use current partition to bootstrap ParMetis partition", "DMPlexRebalanceSharedPoints", useInitialGuess, &useInitialGuess, NULL));
1691   PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_mat_partitioning", "Use the MatPartitioning object to partition", "DMPlexRebalanceSharedPoints", usematpartitioning, &usematpartitioning, NULL));
1692   PetscCall(PetscOptionsViewer("-dm_plex_rebalance_shared_points_monitor", "Monitor the shared points rebalance process", "DMPlexRebalanceSharedPoints", &viewer, &format, NULL));
1693   PetscOptionsEnd();
1694   if (viewer) PetscCall(PetscViewerPushFormat(viewer, format));
1695 
1696   PetscCall(PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1697 
1698   PetscCall(DMGetOptionsPrefix(dm, &prefix));
1699   PetscCall(PetscOptionsGetViewer(comm, ((PetscObject)dm)->options, prefix, "-dm_rebalance_partition_view", &viewer, &format, NULL));
1700   if (viewer) PetscCall(PetscViewerPushFormat(viewer, format));
1701 
1702   /* Figure out all points in the plex that we are interested in balancing. */
1703   PetscCall(DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd));
1704   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
1705   PetscCall(PetscMalloc1(pEnd - pStart, &toBalance));
1706   for (i = 0; i < pEnd - pStart; i++) toBalance[i] = (PetscBool)(i >= eBegin && i < eEnd);
1707 
1708   /* There are three types of points:
1709    * exclusivelyOwned: points that are owned by this process and only seen by this process
1710    * nonExclusivelyOwned: points that are owned by this process but seen by at least another process
1711    * leaf: a point that is seen by this process but owned by a different process
1712    */
1713   PetscCall(DMGetPointSF(dm, &sf));
1714   PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote));
1715   PetscCall(PetscMalloc1(pEnd - pStart, &isLeaf));
1716   PetscCall(PetscMalloc1(pEnd - pStart, &isNonExclusivelyOwned));
1717   PetscCall(PetscMalloc1(pEnd - pStart, &isExclusivelyOwned));
1718   for (i = 0; i < pEnd - pStart; i++) {
1719     isNonExclusivelyOwned[i] = PETSC_FALSE;
1720     isExclusivelyOwned[i]    = PETSC_FALSE;
1721     isLeaf[i]                = PETSC_FALSE;
1722   }
1723 
1724   /* mark all the leafs */
1725   for (i = 0; i < nleafs; i++) isLeaf[ilocal[i] - pStart] = PETSC_TRUE;
1726 
1727   /* for an owned point, we can figure out whether another processor sees it or
1728    * not by calculating its degree */
1729   PetscCall(PetscSFComputeDegreeBegin(sf, &degrees));
1730   PetscCall(PetscSFComputeDegreeEnd(sf, &degrees));
1731   numExclusivelyOwned    = 0;
1732   numNonExclusivelyOwned = 0;
1733   for (i = 0; i < pEnd - pStart; i++) {
1734     if (toBalance[i]) {
1735       if (degrees[i] > 0) {
1736         isNonExclusivelyOwned[i] = PETSC_TRUE;
1737         numNonExclusivelyOwned += 1;
1738       } else {
1739         if (!isLeaf[i]) {
1740           isExclusivelyOwned[i] = PETSC_TRUE;
1741           numExclusivelyOwned += 1;
1742         }
1743       }
1744     }
1745   }
1746 
1747   /* Build a graph with one vertex per core representing the
1748    * exclusively owned points and then one vertex per nonExclusively owned
1749    * point. */
1750   PetscCall(PetscLayoutCreate(comm, &layout));
1751   PetscCall(PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned));
1752   PetscCall(PetscLayoutSetUp(layout));
1753   PetscCall(PetscLayoutGetRanges(layout, &cumSumVertices));
1754   PetscCall(PetscMalloc1(pEnd - pStart, &globalNumbersOfLocalOwnedVertices));
1755   for (i = 0; i < pEnd - pStart; i++) globalNumbersOfLocalOwnedVertices[i] = pStart - 1;
1756   offset  = cumSumVertices[rank];
1757   counter = 0;
1758   for (i = 0; i < pEnd - pStart; i++) {
1759     if (toBalance[i]) {
1760       if (degrees[i] > 0) {
1761         globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset;
1762         counter++;
1763       }
1764     }
1765   }
1766 
1767   /* send the global numbers of vertices I own to the leafs so that they know to connect to it */
1768   PetscCall(PetscMalloc1(pEnd - pStart, &leafGlobalNumbers));
1769   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers, MPI_REPLACE));
1770   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers, MPI_REPLACE));
1771 
1772   /* Build the graph for partitioning */
1773   numRows = 1 + numNonExclusivelyOwned;
1774   PetscCall(PetscLogEventBegin(DMPLEX_RebalBuildGraph, dm, 0, 0, 0));
1775   PetscCall(MatCreate(comm, &A));
1776   PetscCall(MatSetType(A, MATMPIADJ));
1777   PetscCall(MatSetSizes(A, numRows, numRows, cumSumVertices[size], cumSumVertices[size]));
1778   idx = cumSumVertices[rank];
1779   for (i = 0; i < pEnd - pStart; i++) {
1780     if (toBalance[i]) {
1781       if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i];
1782       else if (isLeaf[i]) jdx = leafGlobalNumbers[i];
1783       else continue;
1784       PetscCall(MatSetValue(A, idx, jdx, 1, INSERT_VALUES));
1785       PetscCall(MatSetValue(A, jdx, idx, 1, INSERT_VALUES));
1786     }
1787   }
1788   PetscCall(PetscFree(globalNumbersOfLocalOwnedVertices));
1789   PetscCall(PetscFree(leafGlobalNumbers));
1790   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1791   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1792   PetscCall(PetscLogEventEnd(DMPLEX_RebalBuildGraph, dm, 0, 0, 0));
1793 
1794   nparts = size;
1795   ncon   = 1;
1796   PetscCall(PetscMalloc1(ncon * nparts, &tpwgts));
1797   for (i = 0; i < ncon * nparts; i++) tpwgts[i] = 1. / (nparts);
1798   PetscCall(PetscMalloc1(ncon, &ubvec));
1799   ubvec[0] = 1.05;
1800   ubvec[1] = 1.05;
1801 
1802   PetscCall(PetscMalloc1(ncon * (1 + numNonExclusivelyOwned), &vtxwgt));
1803   if (ncon == 2) {
1804     vtxwgt[0] = numExclusivelyOwned;
1805     vtxwgt[1] = 1;
1806     for (i = 0; i < numNonExclusivelyOwned; i++) {
1807       vtxwgt[ncon * (i + 1)]     = 1;
1808       vtxwgt[ncon * (i + 1) + 1] = 0;
1809     }
1810   } else {
1811     PetscInt base, ms;
1812     PetscCallMPI(MPI_Allreduce(&numExclusivelyOwned, &base, 1, MPIU_INT, MPIU_MAX, PetscObjectComm((PetscObject)dm)));
1813     PetscCall(MatGetSize(A, &ms, NULL));
1814     ms -= size;
1815     base      = PetscMax(base, ms);
1816     vtxwgt[0] = base + numExclusivelyOwned;
1817     for (i = 0; i < numNonExclusivelyOwned; i++) vtxwgt[i + 1] = 1;
1818   }
1819 
1820   if (viewer) {
1821     PetscCall(PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %" PetscInt_FMT " on interface of mesh distribution.\n", entityDepth));
1822     PetscCall(PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %" PetscInt_FMT "\n", cumSumVertices[size]));
1823   }
1824   /* TODO: Drop the parallel/sequential choice here and just use MatPartioner for much more flexibility */
1825   if (usematpartitioning) {
1826     const char *prefix;
1827 
1828     PetscCall(MatPartitioningCreate(PetscObjectComm((PetscObject)dm), &mp));
1829     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)mp, "dm_plex_rebalance_shared_points_"));
1830     PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix));
1831     PetscCall(PetscObjectPrependOptionsPrefix((PetscObject)mp, prefix));
1832     PetscCall(MatPartitioningSetAdjacency(mp, A));
1833     PetscCall(MatPartitioningSetNumberVertexWeights(mp, ncon));
1834     PetscCall(MatPartitioningSetVertexWeights(mp, vtxwgt));
1835     PetscCall(MatPartitioningSetFromOptions(mp));
1836     PetscCall(MatPartitioningApply(mp, &ispart));
1837     PetscCall(ISGetIndices(ispart, (const PetscInt **)&part));
1838   } else if (parallel) {
1839     if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n"));
1840     PetscCall(PetscMalloc1(cumSumVertices[rank + 1] - cumSumVertices[rank], &part));
1841     PetscCall(MatGetRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, &xadj, &adjncy, &done));
1842     PetscCheck(done, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Could not get adjacency information");
1843     PetscCall(PetscMalloc1(4, &options));
1844     options[0] = 1;
1845     options[1] = 0; /* Verbosity */
1846     if (viewer) options[1] = 1;
1847     options[2] = 0;                    /* Seed */
1848     options[3] = PARMETIS_PSR_COUPLED; /* Seed */
1849     wgtflag    = 2;
1850     numflag    = 0;
1851     if (useInitialGuess) {
1852       PetscCall(PetscViewerASCIIPrintf(viewer, "THIS DOES NOT WORK! I don't know why. Using current distribution of points as initial guess.\n"));
1853       for (i = 0; i < numRows; i++) part[i] = rank;
1854       if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n"));
1855       PetscStackPushExternal("ParMETIS_V3_RefineKway");
1856       PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0));
1857       ierr = ParMETIS_V3_RefineKway((PetscInt *)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1858       PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0));
1859       PetscStackPop;
1860       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()");
1861     } else {
1862       PetscStackPushExternal("ParMETIS_V3_PartKway");
1863       PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0));
1864       ierr = ParMETIS_V3_PartKway((PetscInt *)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1865       PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0));
1866       PetscStackPop;
1867       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()");
1868     }
1869     PetscCall(MatRestoreRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, &xadj, &adjncy, &done));
1870     PetscCall(PetscFree(options));
1871   } else {
1872     if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n"));
1873     Mat       As;
1874     PetscInt *partGlobal;
1875     PetscInt *numExclusivelyOwnedAll;
1876 
1877     PetscCall(PetscMalloc1(cumSumVertices[rank + 1] - cumSumVertices[rank], &part));
1878     PetscCall(MatGetSize(A, &numRows, NULL));
1879     PetscCall(PetscLogEventBegin(DMPLEX_RebalGatherGraph, dm, 0, 0, 0));
1880     PetscCall(MatMPIAdjToSeqRankZero(A, &As));
1881     PetscCall(PetscLogEventEnd(DMPLEX_RebalGatherGraph, dm, 0, 0, 0));
1882 
1883     PetscCall(PetscMalloc1(size, &numExclusivelyOwnedAll));
1884     numExclusivelyOwnedAll[rank] = numExclusivelyOwned;
1885     PetscCallMPI(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, numExclusivelyOwnedAll, 1, MPIU_INT, comm));
1886 
1887     PetscCall(PetscMalloc1(numRows, &partGlobal));
1888     PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0));
1889     if (rank == 0) {
1890       PetscInt *vtxwgt_g, numRows_g;
1891 
1892       PetscCall(MatGetRowIJ(As, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows_g, &xadj, &adjncy, &done));
1893       PetscCall(PetscMalloc1(2 * numRows_g, &vtxwgt_g));
1894       for (i = 0; i < size; i++) {
1895         vtxwgt_g[ncon * cumSumVertices[i]] = numExclusivelyOwnedAll[i];
1896         if (ncon > 1) vtxwgt_g[ncon * cumSumVertices[i] + 1] = 1;
1897         for (j = cumSumVertices[i] + 1; j < cumSumVertices[i + 1]; j++) {
1898           vtxwgt_g[ncon * j] = 1;
1899           if (ncon > 1) vtxwgt_g[2 * j + 1] = 0;
1900         }
1901       }
1902 
1903       PetscCall(PetscMalloc1(64, &options));
1904       ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */
1905       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()");
1906       options[METIS_OPTION_CONTIG] = 1;
1907       PetscStackPushExternal("METIS_PartGraphKway");
1908       ierr = METIS_PartGraphKway(&numRows_g, &ncon, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal);
1909       PetscStackPop;
1910       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()");
1911       PetscCall(PetscFree(options));
1912       PetscCall(PetscFree(vtxwgt_g));
1913       PetscCall(MatRestoreRowIJ(As, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows_g, &xadj, &adjncy, &done));
1914       PetscCall(MatDestroy(&As));
1915     }
1916     PetscCall(PetscBarrier((PetscObject)dm));
1917     PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0));
1918     PetscCall(PetscFree(numExclusivelyOwnedAll));
1919 
1920     /* scatter the partitioning information to ranks */
1921     PetscCall(PetscLogEventBegin(DMPLEX_RebalScatterPart, 0, 0, 0, 0));
1922     PetscCall(PetscMalloc1(size, &counts));
1923     PetscCall(PetscMalloc1(size + 1, &mpiCumSumVertices));
1924     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(cumSumVertices[i + 1] - cumSumVertices[i], &(counts[i])));
1925     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i])));
1926     PetscCallMPI(MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm));
1927     PetscCall(PetscFree(counts));
1928     PetscCall(PetscFree(mpiCumSumVertices));
1929     PetscCall(PetscFree(partGlobal));
1930     PetscCall(PetscLogEventEnd(DMPLEX_RebalScatterPart, 0, 0, 0, 0));
1931   }
1932   PetscCall(PetscFree(ubvec));
1933   PetscCall(PetscFree(tpwgts));
1934 
1935   /* Rename the result so that the vertex resembling the exclusively owned points stays on the same rank */
1936   PetscCall(PetscMalloc2(size, &firstVertices, size, &renumbering));
1937   firstVertices[rank] = part[0];
1938   PetscCallMPI(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, firstVertices, 1, MPIU_INT, comm));
1939   for (i = 0; i < size; i++) renumbering[firstVertices[i]] = i;
1940   for (i = 0; i < cumSumVertices[rank + 1] - cumSumVertices[rank]; i++) part[i] = renumbering[part[i]];
1941   PetscCall(PetscFree2(firstVertices, renumbering));
1942 
1943   /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */
1944   failed = (PetscInt)(part[0] != rank);
1945   PetscCallMPI(MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm));
1946   if (failedGlobal > 0) {
1947     PetscCheck(failedGlobal <= 0, comm, PETSC_ERR_LIB, "Metis/Parmetis returned a bad partion");
1948     PetscCall(PetscFree(vtxwgt));
1949     PetscCall(PetscFree(toBalance));
1950     PetscCall(PetscFree(isLeaf));
1951     PetscCall(PetscFree(isNonExclusivelyOwned));
1952     PetscCall(PetscFree(isExclusivelyOwned));
1953     if (usematpartitioning) {
1954       PetscCall(ISRestoreIndices(ispart, (const PetscInt **)&part));
1955       PetscCall(ISDestroy(&ispart));
1956     } else PetscCall(PetscFree(part));
1957     if (viewer) {
1958       PetscCall(PetscViewerPopFormat(viewer));
1959       PetscCall(PetscViewerDestroy(&viewer));
1960     }
1961     PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1962     PetscFunctionReturn(PETSC_SUCCESS);
1963   }
1964 
1965   /* Check how well we did distributing points*/
1966   if (viewer) {
1967     PetscCall(PetscViewerASCIIPrintf(viewer, "Number of owned entities of depth %" PetscInt_FMT ".\n", entityDepth));
1968     PetscCall(PetscViewerASCIIPrintf(viewer, "Initial      "));
1969     PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, NULL, viewer));
1970     PetscCall(PetscViewerASCIIPrintf(viewer, "Rebalanced   "));
1971     PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, part, viewer));
1972   }
1973 
1974   /* Check that every vertex is owned by a process that it is actually connected to. */
1975   PetscCall(MatGetRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &done));
1976   for (i = 1; i <= numNonExclusivelyOwned; i++) {
1977     PetscInt loc = 0;
1978     PetscCall(PetscFindInt(cumSumVertices[part[i]], xadj[i + 1] - xadj[i], &adjncy[xadj[i]], &loc));
1979     /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */
1980     if (loc < 0) part[i] = rank;
1981   }
1982   PetscCall(MatRestoreRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &done));
1983   PetscCall(MatDestroy(&A));
1984 
1985   /* See how significant the influences of the previous fixing up step was.*/
1986   if (viewer) {
1987     PetscCall(PetscViewerASCIIPrintf(viewer, "After fix    "));
1988     PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, part, viewer));
1989   }
1990   if (!usematpartitioning) PetscCall(PetscFree(vtxwgt));
1991   else PetscCall(MatPartitioningDestroy(&mp));
1992 
1993   PetscCall(PetscLayoutDestroy(&layout));
1994 
1995   PetscCall(PetscLogEventBegin(DMPLEX_RebalRewriteSF, dm, 0, 0, 0));
1996   /* Rewrite the SF to reflect the new ownership. */
1997   PetscCall(PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite));
1998   counter = 0;
1999   for (i = 0; i < pEnd - pStart; i++) {
2000     if (toBalance[i]) {
2001       if (isNonExclusivelyOwned[i]) {
2002         pointsToRewrite[counter] = i + pStart;
2003         counter++;
2004       }
2005     }
2006   }
2007   PetscCall(DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part + 1, degrees));
2008   PetscCall(PetscFree(pointsToRewrite));
2009   PetscCall(PetscLogEventEnd(DMPLEX_RebalRewriteSF, dm, 0, 0, 0));
2010 
2011   PetscCall(PetscFree(toBalance));
2012   PetscCall(PetscFree(isLeaf));
2013   PetscCall(PetscFree(isNonExclusivelyOwned));
2014   PetscCall(PetscFree(isExclusivelyOwned));
2015   if (usematpartitioning) {
2016     PetscCall(ISRestoreIndices(ispart, (const PetscInt **)&part));
2017     PetscCall(ISDestroy(&ispart));
2018   } else PetscCall(PetscFree(part));
2019   if (viewer) {
2020     PetscCall(PetscViewerPopFormat(viewer));
2021     PetscCall(PetscViewerDestroy(&viewer));
2022   }
2023   if (success) *success = PETSC_TRUE;
2024   PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 #else
2027   SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis.");
2028 #endif
2029 }
2030