xref: /petsc/src/dm/impls/plex/plexpartition.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/dmpleximpl.h> /*I      "petscdmplex.h"   I*/
2 #include <petsc/private/partitionerimpl.h>
3 #include <petsc/private/hashseti.h>
4 
5 const char *const DMPlexCSRAlgorithms[] = {"mat", "graph", "overlap", "DMPlexCSRAlgorithm", "DM_PLEX_CSR_", NULL};
6 
7 static inline PetscInt DMPlex_GlobalID(PetscInt point) {
8   return point >= 0 ? point : -(point + 1);
9 }
10 
11 static PetscErrorCode DMPlexCreatePartitionerGraph_Overlap(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) {
12   DM              ovdm;
13   PetscSF         sfPoint;
14   IS              cellNumbering;
15   const PetscInt *cellNum;
16   PetscInt       *adj = NULL, *vOffsets = NULL, *vAdj = NULL;
17   PetscBool       useCone, useClosure;
18   PetscInt        dim, depth, overlap, cStart, cEnd, c, v;
19   PetscMPIInt     rank, size;
20 
21   PetscFunctionBegin;
22   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
23   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
24   PetscCall(DMGetDimension(dm, &dim));
25   PetscCall(DMPlexGetDepth(dm, &depth));
26   if (dim != depth) {
27     /* We do not handle the uninterpolated case here */
28     PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
29     /* DMPlexCreateNeighborCSR does not make a numbering */
30     if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
31     /* Different behavior for empty graphs */
32     if (!*numVertices) {
33       PetscCall(PetscMalloc1(1, offsets));
34       (*offsets)[0] = 0;
35     }
36     /* Broken in parallel */
37     if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
38     PetscFunctionReturn(0);
39   }
40   /* Always use FVM adjacency to create partitioner graph */
41   PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
42   PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
43   /* Need overlap >= 1 */
44   PetscCall(DMPlexGetOverlap(dm, &overlap));
45   if (size && overlap < 1) {
46     PetscCall(DMPlexDistributeOverlap(dm, 1, NULL, &ovdm));
47   } else {
48     PetscCall(PetscObjectReference((PetscObject)dm));
49     ovdm = dm;
50   }
51   PetscCall(DMGetPointSF(ovdm, &sfPoint));
52   PetscCall(DMPlexGetHeightStratum(ovdm, height, &cStart, &cEnd));
53   PetscCall(DMPlexCreateNumbering_Plex(ovdm, cStart, cEnd, 0, NULL, sfPoint, &cellNumbering));
54   if (globalNumbering) {
55     PetscCall(PetscObjectReference((PetscObject)cellNumbering));
56     *globalNumbering = cellNumbering;
57   }
58   PetscCall(ISGetIndices(cellNumbering, &cellNum));
59   /* Determine sizes */
60   for (*numVertices = 0, c = cStart; c < cEnd; ++c) {
61     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
62     if (cellNum[c - cStart] < 0) continue;
63     (*numVertices)++;
64   }
65   PetscCall(PetscCalloc1(*numVertices + 1, &vOffsets));
66   for (c = cStart, v = 0; c < cEnd; ++c) {
67     PetscInt adjSize = PETSC_DETERMINE, a, vsize = 0;
68 
69     if (cellNum[c - cStart] < 0) continue;
70     PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj));
71     for (a = 0; a < adjSize; ++a) {
72       const PetscInt point = adj[a];
73       if (point != c && cStart <= point && point < cEnd) ++vsize;
74     }
75     vOffsets[v + 1] = vOffsets[v] + vsize;
76     ++v;
77   }
78   /* Determine adjacency */
79   PetscCall(PetscMalloc1(vOffsets[*numVertices], &vAdj));
80   for (c = cStart, v = 0; c < cEnd; ++c) {
81     PetscInt adjSize = PETSC_DETERMINE, a, off = vOffsets[v];
82 
83     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
84     if (cellNum[c - cStart] < 0) continue;
85     PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj));
86     for (a = 0; a < adjSize; ++a) {
87       const PetscInt point = adj[a];
88       if (point != c && cStart <= point && point < cEnd) vAdj[off++] = DMPlex_GlobalID(cellNum[point - cStart]);
89     }
90     PetscCheck(off == vOffsets[v + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Offsets %" PetscInt_FMT " should be %" PetscInt_FMT, off, vOffsets[v + 1]);
91     /* Sort adjacencies (not strictly necessary) */
92     PetscCall(PetscSortInt(off - vOffsets[v], &vAdj[vOffsets[v]]));
93     ++v;
94   }
95   PetscCall(PetscFree(adj));
96   PetscCall(ISRestoreIndices(cellNumbering, &cellNum));
97   PetscCall(ISDestroy(&cellNumbering));
98   PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
99   PetscCall(DMDestroy(&ovdm));
100   if (offsets) {
101     *offsets = vOffsets;
102   } else PetscCall(PetscFree(vOffsets));
103   if (adjacency) {
104     *adjacency = vAdj;
105   } else PetscCall(PetscFree(vAdj));
106   PetscFunctionReturn(0);
107 }
108 
109 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) {
110   PetscInt        dim, depth, p, pStart, pEnd, a, adjSize, idx, size;
111   PetscInt       *adj = NULL, *vOffsets = NULL, *graph = NULL;
112   IS              cellNumbering;
113   const PetscInt *cellNum;
114   PetscBool       useCone, useClosure;
115   PetscSection    section;
116   PetscSegBuffer  adjBuffer;
117   PetscSF         sfPoint;
118   PetscInt       *adjCells = NULL, *remoteCells = NULL;
119   const PetscInt *local;
120   PetscInt        nroots, nleaves, l;
121   PetscMPIInt     rank;
122 
123   PetscFunctionBegin;
124   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
125   PetscCall(DMGetDimension(dm, &dim));
126   PetscCall(DMPlexGetDepth(dm, &depth));
127   if (dim != depth) {
128     /* We do not handle the uninterpolated case here */
129     PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
130     /* DMPlexCreateNeighborCSR does not make a numbering */
131     if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
132     /* Different behavior for empty graphs */
133     if (!*numVertices) {
134       PetscCall(PetscMalloc1(1, offsets));
135       (*offsets)[0] = 0;
136     }
137     /* Broken in parallel */
138     if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
139     PetscFunctionReturn(0);
140   }
141   PetscCall(DMGetPointSF(dm, &sfPoint));
142   PetscCall(DMPlexGetHeightStratum(dm, height, &pStart, &pEnd));
143   /* Build adjacency graph via a section/segbuffer */
144   PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
145   PetscCall(PetscSectionSetChart(section, pStart, pEnd));
146   PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
147   /* Always use FVM adjacency to create partitioner graph */
148   PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
149   PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
150   PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering));
151   if (globalNumbering) {
152     PetscCall(PetscObjectReference((PetscObject)cellNumbering));
153     *globalNumbering = cellNumbering;
154   }
155   PetscCall(ISGetIndices(cellNumbering, &cellNum));
156   /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells
157      Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves)
158    */
159   PetscCall(PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL));
160   if (nroots >= 0) {
161     PetscInt fStart, fEnd, f;
162 
163     PetscCall(PetscCalloc2(nroots, &adjCells, nroots, &remoteCells));
164     PetscCall(DMPlexGetHeightStratum(dm, height + 1, &fStart, &fEnd));
165     for (l = 0; l < nroots; ++l) adjCells[l] = -3;
166     for (f = fStart; f < fEnd; ++f) {
167       const PetscInt *support;
168       PetscInt        supportSize;
169 
170       PetscCall(DMPlexGetSupport(dm, f, &support));
171       PetscCall(DMPlexGetSupportSize(dm, f, &supportSize));
172       if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
173       else if (supportSize == 2) {
174         PetscCall(PetscFindInt(support[0], nleaves, local, &p));
175         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1] - pStart]);
176         PetscCall(PetscFindInt(support[1], nleaves, local, &p));
177         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
178       }
179       /* Handle non-conforming meshes */
180       if (supportSize > 2) {
181         PetscInt        numChildren, i;
182         const PetscInt *children;
183 
184         PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, &children));
185         for (i = 0; i < numChildren; ++i) {
186           const PetscInt child = children[i];
187           if (fStart <= child && child < fEnd) {
188             PetscCall(DMPlexGetSupport(dm, child, &support));
189             PetscCall(DMPlexGetSupportSize(dm, child, &supportSize));
190             if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
191             else if (supportSize == 2) {
192               PetscCall(PetscFindInt(support[0], nleaves, local, &p));
193               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1] - pStart]);
194               PetscCall(PetscFindInt(support[1], nleaves, local, &p));
195               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0] - pStart]);
196             }
197           }
198         }
199       }
200     }
201     for (l = 0; l < nroots; ++l) remoteCells[l] = -1;
202     PetscCall(PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_REPLACE));
203     PetscCall(PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_REPLACE));
204     PetscCall(PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX));
205     PetscCall(PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX));
206   }
207   /* Combine local and global adjacencies */
208   for (*numVertices = 0, p = pStart; p < pEnd; p++) {
209     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
210     if (nroots > 0) {
211       if (cellNum[p - pStart] < 0) continue;
212     }
213     /* Add remote cells */
214     if (remoteCells) {
215       const PetscInt  gp = DMPlex_GlobalID(cellNum[p - pStart]);
216       PetscInt        coneSize, numChildren, c, i;
217       const PetscInt *cone, *children;
218 
219       PetscCall(DMPlexGetCone(dm, p, &cone));
220       PetscCall(DMPlexGetConeSize(dm, p, &coneSize));
221       for (c = 0; c < coneSize; ++c) {
222         const PetscInt point = cone[c];
223         if (remoteCells[point] >= 0 && remoteCells[point] != gp) {
224           PetscInt *PETSC_RESTRICT pBuf;
225           PetscCall(PetscSectionAddDof(section, p, 1));
226           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
227           *pBuf = remoteCells[point];
228         }
229         /* Handle non-conforming meshes */
230         PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
231         for (i = 0; i < numChildren; ++i) {
232           const PetscInt child = children[i];
233           if (remoteCells[child] >= 0 && remoteCells[child] != gp) {
234             PetscInt *PETSC_RESTRICT pBuf;
235             PetscCall(PetscSectionAddDof(section, p, 1));
236             PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
237             *pBuf = remoteCells[child];
238           }
239         }
240       }
241     }
242     /* Add local cells */
243     adjSize = PETSC_DETERMINE;
244     PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
245     for (a = 0; a < adjSize; ++a) {
246       const PetscInt point = adj[a];
247       if (point != p && pStart <= point && point < pEnd) {
248         PetscInt *PETSC_RESTRICT pBuf;
249         PetscCall(PetscSectionAddDof(section, p, 1));
250         PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
251         *pBuf = DMPlex_GlobalID(cellNum[point - pStart]);
252       }
253     }
254     (*numVertices)++;
255   }
256   PetscCall(PetscFree(adj));
257   PetscCall(PetscFree2(adjCells, remoteCells));
258   PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
259 
260   /* Derive CSR graph from section/segbuffer */
261   PetscCall(PetscSectionSetUp(section));
262   PetscCall(PetscSectionGetStorageSize(section, &size));
263   PetscCall(PetscMalloc1(*numVertices + 1, &vOffsets));
264   for (idx = 0, p = pStart; p < pEnd; p++) {
265     if (nroots > 0) {
266       if (cellNum[p - pStart] < 0) continue;
267     }
268     PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++])));
269   }
270   vOffsets[*numVertices] = size;
271   PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph));
272 
273   if (nroots >= 0) {
274     /* Filter out duplicate edges using section/segbuffer */
275     PetscCall(PetscSectionReset(section));
276     PetscCall(PetscSectionSetChart(section, 0, *numVertices));
277     for (p = 0; p < *numVertices; p++) {
278       PetscInt start = vOffsets[p], end = vOffsets[p + 1];
279       PetscInt numEdges = end - start, *PETSC_RESTRICT edges;
280       PetscCall(PetscSortRemoveDupsInt(&numEdges, &graph[start]));
281       PetscCall(PetscSectionSetDof(section, p, numEdges));
282       PetscCall(PetscSegBufferGetInts(adjBuffer, numEdges, &edges));
283       PetscCall(PetscArraycpy(edges, &graph[start], numEdges));
284     }
285     PetscCall(PetscFree(vOffsets));
286     PetscCall(PetscFree(graph));
287     /* Derive CSR graph from section/segbuffer */
288     PetscCall(PetscSectionSetUp(section));
289     PetscCall(PetscSectionGetStorageSize(section, &size));
290     PetscCall(PetscMalloc1(*numVertices + 1, &vOffsets));
291     for (idx = 0, p = 0; p < *numVertices; p++) PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++])));
292     vOffsets[*numVertices] = size;
293     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph));
294   } else {
295     /* Sort adjacencies (not strictly necessary) */
296     for (p = 0; p < *numVertices; p++) {
297       PetscInt start = vOffsets[p], end = vOffsets[p + 1];
298       PetscCall(PetscSortInt(end - start, &graph[start]));
299     }
300   }
301 
302   if (offsets) *offsets = vOffsets;
303   if (adjacency) *adjacency = graph;
304 
305   /* Cleanup */
306   PetscCall(PetscSegBufferDestroy(&adjBuffer));
307   PetscCall(PetscSectionDestroy(&section));
308   PetscCall(ISRestoreIndices(cellNumbering, &cellNum));
309   PetscCall(ISDestroy(&cellNumbering));
310   PetscFunctionReturn(0);
311 }
312 
313 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) {
314   Mat             conn, CSR;
315   IS              fis, cis, cis_own;
316   PetscSF         sfPoint;
317   const PetscInt *rows, *cols, *ii, *jj;
318   PetscInt       *idxs, *idxs2;
319   PetscInt        dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd;
320   PetscMPIInt     rank;
321   PetscBool       flg;
322 
323   PetscFunctionBegin;
324   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
325   PetscCall(DMGetDimension(dm, &dim));
326   PetscCall(DMPlexGetDepth(dm, &depth));
327   if (dim != depth) {
328     /* We do not handle the uninterpolated case here */
329     PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
330     /* DMPlexCreateNeighborCSR does not make a numbering */
331     if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
332     /* Different behavior for empty graphs */
333     if (!*numVertices) {
334       PetscCall(PetscMalloc1(1, offsets));
335       (*offsets)[0] = 0;
336     }
337     /* Broken in parallel */
338     if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
339     PetscFunctionReturn(0);
340   }
341   /* Interpolated and parallel case */
342 
343   /* numbering */
344   PetscCall(DMGetPointSF(dm, &sfPoint));
345   PetscCall(DMPlexGetHeightStratum(dm, height, &cStart, &cEnd));
346   PetscCall(DMPlexGetHeightStratum(dm, height + 1, &fStart, &fEnd));
347   PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis));
348   PetscCall(DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis));
349   if (globalNumbering) PetscCall(ISDuplicate(cis, globalNumbering));
350 
351   /* get positive global ids and local sizes for facets and cells */
352   PetscCall(ISGetLocalSize(fis, &m));
353   PetscCall(ISGetIndices(fis, &rows));
354   PetscCall(PetscMalloc1(m, &idxs));
355   for (i = 0, floc = 0; i < m; i++) {
356     const PetscInt p = rows[i];
357 
358     if (p < 0) {
359       idxs[i] = -(p + 1);
360     } else {
361       idxs[i] = p;
362       floc += 1;
363     }
364   }
365   PetscCall(ISRestoreIndices(fis, &rows));
366   PetscCall(ISDestroy(&fis));
367   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis));
368 
369   PetscCall(ISGetLocalSize(cis, &m));
370   PetscCall(ISGetIndices(cis, &cols));
371   PetscCall(PetscMalloc1(m, &idxs));
372   PetscCall(PetscMalloc1(m, &idxs2));
373   for (i = 0, cloc = 0; i < m; i++) {
374     const PetscInt p = cols[i];
375 
376     if (p < 0) {
377       idxs[i] = -(p + 1);
378     } else {
379       idxs[i]       = p;
380       idxs2[cloc++] = p;
381     }
382   }
383   PetscCall(ISRestoreIndices(cis, &cols));
384   PetscCall(ISDestroy(&cis));
385   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis));
386   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own));
387 
388   /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */
389   PetscCall(MatCreate(PetscObjectComm((PetscObject)dm), &conn));
390   PetscCall(MatSetSizes(conn, floc, cloc, M, N));
391   PetscCall(MatSetType(conn, MATMPIAIJ));
392   PetscCall(DMPlexGetMaxSizes(dm, NULL, &lm));
393   PetscCallMPI(MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)dm)));
394   PetscCall(MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL));
395 
396   /* Assemble matrix */
397   PetscCall(ISGetIndices(fis, &rows));
398   PetscCall(ISGetIndices(cis, &cols));
399   for (c = cStart; c < cEnd; c++) {
400     const PetscInt *cone;
401     PetscInt        coneSize, row, col, f;
402 
403     col = cols[c - cStart];
404     PetscCall(DMPlexGetCone(dm, c, &cone));
405     PetscCall(DMPlexGetConeSize(dm, c, &coneSize));
406     for (f = 0; f < coneSize; f++) {
407       const PetscScalar v = 1.0;
408       const PetscInt   *children;
409       PetscInt          numChildren, ch;
410 
411       row = rows[cone[f] - fStart];
412       PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES));
413 
414       /* non-conforming meshes */
415       PetscCall(DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children));
416       for (ch = 0; ch < numChildren; ch++) {
417         const PetscInt child = children[ch];
418 
419         if (child < fStart || child >= fEnd) continue;
420         row = rows[child - fStart];
421         PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES));
422       }
423     }
424   }
425   PetscCall(ISRestoreIndices(fis, &rows));
426   PetscCall(ISRestoreIndices(cis, &cols));
427   PetscCall(ISDestroy(&fis));
428   PetscCall(ISDestroy(&cis));
429   PetscCall(MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY));
430   PetscCall(MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY));
431 
432   /* Get parallel CSR by doing conn^T * conn */
433   PetscCall(MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR));
434   PetscCall(MatDestroy(&conn));
435 
436   /* extract local part of the CSR */
437   PetscCall(MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn));
438   PetscCall(MatDestroy(&CSR));
439   PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg));
440   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
441 
442   /* get back requested output */
443   if (numVertices) *numVertices = m;
444   if (offsets) {
445     PetscCall(PetscCalloc1(m + 1, &idxs));
446     for (i = 1; i < m + 1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */
447     *offsets = idxs;
448   }
449   if (adjacency) {
450     PetscCall(PetscMalloc1(ii[m] - m, &idxs));
451     PetscCall(ISGetIndices(cis_own, &rows));
452     for (i = 0, c = 0; i < m; i++) {
453       PetscInt j, g = rows[i];
454 
455       for (j = ii[i]; j < ii[i + 1]; j++) {
456         if (jj[j] == g) continue; /* again, self-connectivity */
457         idxs[c++] = jj[j];
458       }
459     }
460     PetscCheck(c == ii[m] - m, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, c, ii[m] - m);
461     PetscCall(ISRestoreIndices(cis_own, &rows));
462     *adjacency = idxs;
463   }
464 
465   /* cleanup */
466   PetscCall(ISDestroy(&cis_own));
467   PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg));
468   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
469   PetscCall(MatDestroy(&conn));
470   PetscFunctionReturn(0);
471 }
472 
473 /*@C
474   DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner
475 
476   Input Parameters:
477 + dm      - The mesh DM dm
478 - height  - Height of the strata from which to construct the graph
479 
480   Output Parameters:
481 + numVertices     - Number of vertices in the graph
482 . offsets         - Point offsets in the graph
483 . adjacency       - Point connectivity in the graph
484 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency".  Negative indicates that the cell is a duplicate from another process.
485 
486   The user can control the definition of adjacency for the mesh using DMSetAdjacency(). They should choose the combination appropriate for the function
487   representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree().
488 
489   Options Database Keys:
490 . -dm_plex_csr_alg <mat,graph,overlap> - Choose the algorithm for computing the CSR graph
491 
492   Level: developer
493 
494 .seealso: `PetscPartitionerGetType()`, `PetscPartitionerCreate()`, `DMSetAdjacency()`
495 @*/
496 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) {
497   DMPlexCSRAlgorithm alg = DM_PLEX_CSR_GRAPH;
498 
499   PetscFunctionBegin;
500   PetscCall(PetscOptionsGetEnum(((PetscObject)dm)->options, ((PetscObject)dm)->prefix, "-dm_plex_csr_alg", DMPlexCSRAlgorithms, (PetscEnum *)&alg, NULL));
501   switch (alg) {
502   case DM_PLEX_CSR_MAT: PetscCall(DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering)); break;
503   case DM_PLEX_CSR_GRAPH: PetscCall(DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering)); break;
504   case DM_PLEX_CSR_OVERLAP: PetscCall(DMPlexCreatePartitionerGraph_Overlap(dm, height, numVertices, offsets, adjacency, globalNumbering)); break;
505   }
506   PetscFunctionReturn(0);
507 }
508 
509 /*@C
510   DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format.
511 
512   Collective on DM
513 
514   Input Parameters:
515 + dm - The DMPlex
516 - cellHeight - The height of mesh points to treat as cells (default should be 0)
517 
518   Output Parameters:
519 + numVertices - The number of local vertices in the graph, or cells in the mesh.
520 . offsets     - The offset to the adjacency list for each cell
521 - adjacency   - The adjacency list for all cells
522 
523   Note: This is suitable for input to a mesh partitioner like ParMetis.
524 
525   Level: advanced
526 
527 .seealso: `DMPlexCreate()`
528 @*/
529 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency) {
530   const PetscInt maxFaceCases = 30;
531   PetscInt       numFaceCases = 0;
532   PetscInt       numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */
533   PetscInt      *off, *adj;
534   PetscInt      *neighborCells = NULL;
535   PetscInt       dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell;
536 
537   PetscFunctionBegin;
538   /* For parallel partitioning, I think you have to communicate supports */
539   PetscCall(DMGetDimension(dm, &dim));
540   cellDim = dim - cellHeight;
541   PetscCall(DMPlexGetDepth(dm, &depth));
542   PetscCall(DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd));
543   if (cEnd - cStart == 0) {
544     if (numVertices) *numVertices = 0;
545     if (offsets) *offsets = NULL;
546     if (adjacency) *adjacency = NULL;
547     PetscFunctionReturn(0);
548   }
549   numCells  = cEnd - cStart;
550   faceDepth = depth - cellHeight;
551   if (dim == depth) {
552     PetscInt f, fStart, fEnd;
553 
554     PetscCall(PetscCalloc1(numCells + 1, &off));
555     /* Count neighboring cells */
556     PetscCall(DMPlexGetHeightStratum(dm, cellHeight + 1, &fStart, &fEnd));
557     for (f = fStart; f < fEnd; ++f) {
558       const PetscInt *support;
559       PetscInt        supportSize;
560       PetscCall(DMPlexGetSupportSize(dm, f, &supportSize));
561       PetscCall(DMPlexGetSupport(dm, f, &support));
562       if (supportSize == 2) {
563         PetscInt numChildren;
564 
565         PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, NULL));
566         if (!numChildren) {
567           ++off[support[0] - cStart + 1];
568           ++off[support[1] - cStart + 1];
569         }
570       }
571     }
572     /* Prefix sum */
573     for (c = 1; c <= numCells; ++c) off[c] += off[c - 1];
574     if (adjacency) {
575       PetscInt *tmp;
576 
577       PetscCall(PetscMalloc1(off[numCells], &adj));
578       PetscCall(PetscMalloc1(numCells + 1, &tmp));
579       PetscCall(PetscArraycpy(tmp, off, numCells + 1));
580       /* Get neighboring cells */
581       for (f = fStart; f < fEnd; ++f) {
582         const PetscInt *support;
583         PetscInt        supportSize;
584         PetscCall(DMPlexGetSupportSize(dm, f, &supportSize));
585         PetscCall(DMPlexGetSupport(dm, f, &support));
586         if (supportSize == 2) {
587           PetscInt numChildren;
588 
589           PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, NULL));
590           if (!numChildren) {
591             adj[tmp[support[0] - cStart]++] = support[1];
592             adj[tmp[support[1] - cStart]++] = support[0];
593           }
594         }
595       }
596       for (c = 0; c < cEnd - cStart; ++c) PetscAssert(tmp[c] == off[c + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %" PetscInt_FMT " != %" PetscInt_FMT " for cell %" PetscInt_FMT, tmp[c], off[c], c + cStart);
597       PetscCall(PetscFree(tmp));
598     }
599     if (numVertices) *numVertices = numCells;
600     if (offsets) *offsets = off;
601     if (adjacency) *adjacency = adj;
602     PetscFunctionReturn(0);
603   }
604   /* Setup face recognition */
605   if (faceDepth == 1) {
606     PetscInt cornersSeen[30] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* Could use PetscBT */
607 
608     for (c = cStart; c < cEnd; ++c) {
609       PetscInt corners;
610 
611       PetscCall(DMPlexGetConeSize(dm, c, &corners));
612       if (!cornersSeen[corners]) {
613         PetscInt nFV;
614 
615         PetscCheck(numFaceCases < maxFaceCases, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases");
616         cornersSeen[corners] = 1;
617 
618         PetscCall(DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV));
619 
620         numFaceVertices[numFaceCases++] = nFV;
621       }
622     }
623   }
624   PetscCall(PetscCalloc1(numCells + 1, &off));
625   /* Count neighboring cells */
626   for (cell = cStart; cell < cEnd; ++cell) {
627     PetscInt numNeighbors = PETSC_DETERMINE, n;
628 
629     PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells));
630     /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
631     for (n = 0; n < numNeighbors; ++n) {
632       PetscInt        cellPair[2];
633       PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
634       PetscInt        meetSize = 0;
635       const PetscInt *meet     = NULL;
636 
637       cellPair[0] = cell;
638       cellPair[1] = neighborCells[n];
639       if (cellPair[0] == cellPair[1]) continue;
640       if (!found) {
641         PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet));
642         if (meetSize) {
643           PetscInt f;
644 
645           for (f = 0; f < numFaceCases; ++f) {
646             if (numFaceVertices[f] == meetSize) {
647               found = PETSC_TRUE;
648               break;
649             }
650           }
651         }
652         PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet));
653       }
654       if (found) ++off[cell - cStart + 1];
655     }
656   }
657   /* Prefix sum */
658   for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell - 1];
659 
660   if (adjacency) {
661     PetscCall(PetscMalloc1(off[numCells], &adj));
662     /* Get neighboring cells */
663     for (cell = cStart; cell < cEnd; ++cell) {
664       PetscInt numNeighbors = PETSC_DETERMINE, n;
665       PetscInt cellOffset   = 0;
666 
667       PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells));
668       /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
669       for (n = 0; n < numNeighbors; ++n) {
670         PetscInt        cellPair[2];
671         PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
672         PetscInt        meetSize = 0;
673         const PetscInt *meet     = NULL;
674 
675         cellPair[0] = cell;
676         cellPair[1] = neighborCells[n];
677         if (cellPair[0] == cellPair[1]) continue;
678         if (!found) {
679           PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet));
680           if (meetSize) {
681             PetscInt f;
682 
683             for (f = 0; f < numFaceCases; ++f) {
684               if (numFaceVertices[f] == meetSize) {
685                 found = PETSC_TRUE;
686                 break;
687               }
688             }
689           }
690           PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet));
691         }
692         if (found) {
693           adj[off[cell - cStart] + cellOffset] = neighborCells[n];
694           ++cellOffset;
695         }
696       }
697     }
698   }
699   PetscCall(PetscFree(neighborCells));
700   if (numVertices) *numVertices = numCells;
701   if (offsets) *offsets = off;
702   if (adjacency) *adjacency = adj;
703   PetscFunctionReturn(0);
704 }
705 
706 /*@
707   PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh
708 
709   Collective on PetscPartitioner
710 
711   Input Parameters:
712 + part    - The PetscPartitioner
713 . targetSection - The PetscSection describing the absolute weight of each partition (can be NULL)
714 - dm      - The mesh DM
715 
716   Output Parameters:
717 + partSection     - The PetscSection giving the division of points by partition
718 - partition       - The list of points by partition
719 
720   Notes:
721     If the DM has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified
722     by the section in the transitive closure of the point.
723 
724   Level: developer
725 
726 .seealso `DMPlexDistribute()`, `PetscPartitionerCreate()`, `PetscSectionCreate()`, `PetscSectionSetChart()`, `PetscPartitionerPartition()`
727 @*/
728 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition) {
729   PetscMPIInt  size;
730   PetscBool    isplex;
731   PetscSection vertSection = NULL;
732 
733   PetscFunctionBegin;
734   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1);
735   PetscValidHeaderSpecific(dm, DM_CLASSID, 2);
736   if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3);
737   PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4);
738   PetscValidPointer(partition, 5);
739   PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex));
740   PetscCheck(isplex, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)dm)->type_name);
741   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)part), &size));
742   if (size == 1) {
743     PetscInt *points;
744     PetscInt  cStart, cEnd, c;
745 
746     PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
747     PetscCall(PetscSectionReset(partSection));
748     PetscCall(PetscSectionSetChart(partSection, 0, size));
749     PetscCall(PetscSectionSetDof(partSection, 0, cEnd - cStart));
750     PetscCall(PetscSectionSetUp(partSection));
751     PetscCall(PetscMalloc1(cEnd - cStart, &points));
752     for (c = cStart; c < cEnd; ++c) points[c] = c;
753     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)part), cEnd - cStart, points, PETSC_OWN_POINTER, partition));
754     PetscFunctionReturn(0);
755   }
756   if (part->height == 0) {
757     PetscInt  numVertices = 0;
758     PetscInt *start       = NULL;
759     PetscInt *adjacency   = NULL;
760     IS        globalNumbering;
761 
762     if (!part->noGraph || part->viewGraph) {
763       PetscCall(DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering));
764     } else { /* only compute the number of owned local vertices */
765       const PetscInt *idxs;
766       PetscInt        p, pStart, pEnd;
767 
768       PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd));
769       PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering));
770       PetscCall(ISGetIndices(globalNumbering, &idxs));
771       for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1;
772       PetscCall(ISRestoreIndices(globalNumbering, &idxs));
773     }
774     if (part->usevwgt) {
775       PetscSection    section = dm->localSection, clSection = NULL;
776       IS              clPoints = NULL;
777       const PetscInt *gid, *clIdx;
778       PetscInt        v, p, pStart, pEnd;
779 
780       /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */
781       /* We do this only if the local section has been set */
782       if (section) {
783         PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL));
784         if (!clSection) PetscCall(DMPlexCreateClosureIndex(dm, NULL));
785         PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints));
786         PetscCall(ISGetIndices(clPoints, &clIdx));
787       }
788       PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd));
789       PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &vertSection));
790       PetscCall(PetscSectionSetChart(vertSection, 0, numVertices));
791       if (globalNumbering) {
792         PetscCall(ISGetIndices(globalNumbering, &gid));
793       } else gid = NULL;
794       for (p = pStart, v = 0; p < pEnd; ++p) {
795         PetscInt dof = 1;
796 
797         /* skip cells in the overlap */
798         if (gid && gid[p - pStart] < 0) continue;
799 
800         if (section) {
801           PetscInt cl, clSize, clOff;
802 
803           dof = 0;
804           PetscCall(PetscSectionGetDof(clSection, p, &clSize));
805           PetscCall(PetscSectionGetOffset(clSection, p, &clOff));
806           for (cl = 0; cl < clSize; cl += 2) {
807             PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */
808 
809             PetscCall(PetscSectionGetDof(section, clPoint, &clDof));
810             dof += clDof;
811           }
812         }
813         PetscCheck(dof, PETSC_COMM_SELF, PETSC_ERR_SUP, "Number of dofs for point %" PetscInt_FMT " in the local section should be positive", p);
814         PetscCall(PetscSectionSetDof(vertSection, v, dof));
815         v++;
816       }
817       if (globalNumbering) PetscCall(ISRestoreIndices(globalNumbering, &gid));
818       if (clPoints) PetscCall(ISRestoreIndices(clPoints, &clIdx));
819       PetscCall(PetscSectionSetUp(vertSection));
820     }
821     PetscCall(PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition));
822     PetscCall(PetscFree(start));
823     PetscCall(PetscFree(adjacency));
824     if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */
825       const PetscInt *globalNum;
826       const PetscInt *partIdx;
827       PetscInt       *map, cStart, cEnd;
828       PetscInt       *adjusted, i, localSize, offset;
829       IS              newPartition;
830 
831       PetscCall(ISGetLocalSize(*partition, &localSize));
832       PetscCall(PetscMalloc1(localSize, &adjusted));
833       PetscCall(ISGetIndices(globalNumbering, &globalNum));
834       PetscCall(ISGetIndices(*partition, &partIdx));
835       PetscCall(PetscMalloc1(localSize, &map));
836       PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
837       for (i = cStart, offset = 0; i < cEnd; i++) {
838         if (globalNum[i - cStart] >= 0) map[offset++] = i;
839       }
840       for (i = 0; i < localSize; i++) adjusted[i] = map[partIdx[i]];
841       PetscCall(PetscFree(map));
842       PetscCall(ISRestoreIndices(*partition, &partIdx));
843       PetscCall(ISRestoreIndices(globalNumbering, &globalNum));
844       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, localSize, adjusted, PETSC_OWN_POINTER, &newPartition));
845       PetscCall(ISDestroy(&globalNumbering));
846       PetscCall(ISDestroy(partition));
847       *partition = newPartition;
848     }
849   } else SETERRQ(PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %" PetscInt_FMT " for points to partition", part->height);
850   PetscCall(PetscSectionDestroy(&vertSection));
851   PetscFunctionReturn(0);
852 }
853 
854 /*@
855   DMPlexGetPartitioner - Get the mesh partitioner
856 
857   Not collective
858 
859   Input Parameter:
860 . dm - The DM
861 
862   Output Parameter:
863 . part - The PetscPartitioner
864 
865   Level: developer
866 
867   Note: This gets a borrowed reference, so the user should not destroy this PetscPartitioner.
868 
869 .seealso `DMPlexDistribute()`, `DMPlexSetPartitioner()`, `PetscPartitionerDMPlexPartition()`, `PetscPartitionerCreate()`
870 @*/
871 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part) {
872   DM_Plex *mesh = (DM_Plex *)dm->data;
873 
874   PetscFunctionBegin;
875   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
876   PetscValidPointer(part, 2);
877   *part = mesh->partitioner;
878   PetscFunctionReturn(0);
879 }
880 
881 /*@
882   DMPlexSetPartitioner - Set the mesh partitioner
883 
884   logically collective on DM
885 
886   Input Parameters:
887 + dm - The DM
888 - part - The partitioner
889 
890   Level: developer
891 
892   Note: Any existing PetscPartitioner will be destroyed.
893 
894 .seealso `DMPlexDistribute()`, `DMPlexGetPartitioner()`, `PetscPartitionerCreate()`
895 @*/
896 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part) {
897   DM_Plex *mesh = (DM_Plex *)dm->data;
898 
899   PetscFunctionBegin;
900   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
901   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2);
902   PetscCall(PetscObjectReference((PetscObject)part));
903   PetscCall(PetscPartitionerDestroy(&mesh->partitioner));
904   mesh->partitioner = part;
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point) {
909   const PetscInt *cone;
910   PetscInt        coneSize, c;
911   PetscBool       missing;
912 
913   PetscFunctionBeginHot;
914   PetscCall(PetscHSetIQueryAdd(ht, point, &missing));
915   if (missing) {
916     PetscCall(DMPlexGetCone(dm, point, &cone));
917     PetscCall(DMPlexGetConeSize(dm, point, &coneSize));
918     for (c = 0; c < coneSize; c++) PetscCall(DMPlexAddClosure_Private(dm, ht, cone[c]));
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down) {
924   PetscFunctionBegin;
925   if (up) {
926     PetscInt parent;
927 
928     PetscCall(DMPlexGetTreeParent(dm, point, &parent, NULL));
929     if (parent != point) {
930       PetscInt closureSize, *closure = NULL, i;
931 
932       PetscCall(DMPlexGetTransitiveClosure(dm, parent, PETSC_TRUE, &closureSize, &closure));
933       for (i = 0; i < closureSize; i++) {
934         PetscInt cpoint = closure[2 * i];
935 
936         PetscCall(PetscHSetIAdd(ht, cpoint));
937         PetscCall(DMPlexAddClosure_Tree(dm, ht, cpoint, PETSC_TRUE, PETSC_FALSE));
938       }
939       PetscCall(DMPlexRestoreTransitiveClosure(dm, parent, PETSC_TRUE, &closureSize, &closure));
940     }
941   }
942   if (down) {
943     PetscInt        numChildren;
944     const PetscInt *children;
945 
946     PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
947     if (numChildren) {
948       PetscInt i;
949 
950       for (i = 0; i < numChildren; i++) {
951         PetscInt cpoint = children[i];
952 
953         PetscCall(PetscHSetIAdd(ht, cpoint));
954         PetscCall(DMPlexAddClosure_Tree(dm, ht, cpoint, PETSC_FALSE, PETSC_TRUE));
955       }
956     }
957   }
958   PetscFunctionReturn(0);
959 }
960 
961 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point) {
962   PetscInt parent;
963 
964   PetscFunctionBeginHot;
965   PetscCall(DMPlexGetTreeParent(dm, point, &parent, NULL));
966   if (point != parent) {
967     const PetscInt *cone;
968     PetscInt        coneSize, c;
969 
970     PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, parent));
971     PetscCall(DMPlexAddClosure_Private(dm, ht, parent));
972     PetscCall(DMPlexGetCone(dm, parent, &cone));
973     PetscCall(DMPlexGetConeSize(dm, parent, &coneSize));
974     for (c = 0; c < coneSize; c++) {
975       const PetscInt cp = cone[c];
976 
977       PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, cp));
978     }
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point) {
984   PetscInt        i, numChildren;
985   const PetscInt *children;
986 
987   PetscFunctionBeginHot;
988   PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
989   for (i = 0; i < numChildren; i++) PetscCall(PetscHSetIAdd(ht, children[i]));
990   PetscFunctionReturn(0);
991 }
992 
993 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point) {
994   const PetscInt *cone;
995   PetscInt        coneSize, c;
996 
997   PetscFunctionBeginHot;
998   PetscCall(PetscHSetIAdd(ht, point));
999   PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, point));
1000   PetscCall(DMPlexAddClosureTree_Down_Private(dm, ht, point));
1001   PetscCall(DMPlexGetCone(dm, point, &cone));
1002   PetscCall(DMPlexGetConeSize(dm, point, &coneSize));
1003   for (c = 0; c < coneSize; c++) PetscCall(DMPlexAddClosureTree_Private(dm, ht, cone[c]));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS) {
1008   DM_Plex        *mesh    = (DM_Plex *)dm->data;
1009   const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE;
1010   PetscInt        nelems, *elems, off = 0, p;
1011   PetscHSetI      ht = NULL;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(PetscHSetICreate(&ht));
1015   PetscCall(PetscHSetIResize(ht, numPoints * 16));
1016   if (!hasTree) {
1017     for (p = 0; p < numPoints; ++p) PetscCall(DMPlexAddClosure_Private(dm, ht, points[p]));
1018   } else {
1019 #if 1
1020     for (p = 0; p < numPoints; ++p) PetscCall(DMPlexAddClosureTree_Private(dm, ht, points[p]));
1021 #else
1022     PetscInt *closure = NULL, closureSize, c;
1023     for (p = 0; p < numPoints; ++p) {
1024       PetscCall(DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure));
1025       for (c = 0; c < closureSize * 2; c += 2) {
1026         PetscCall(PetscHSetIAdd(ht, closure[c]));
1027         if (hasTree) PetscCall(DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE));
1028       }
1029     }
1030     if (closure) PetscCall(DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure));
1031 #endif
1032   }
1033   PetscCall(PetscHSetIGetSize(ht, &nelems));
1034   PetscCall(PetscMalloc1(nelems, &elems));
1035   PetscCall(PetscHSetIGetElems(ht, &off, elems));
1036   PetscCall(PetscHSetIDestroy(&ht));
1037   PetscCall(PetscSortInt(nelems, elems));
1038   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS));
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 /*@
1043   DMPlexPartitionLabelClosure - Add the closure of all points to the partition label
1044 
1045   Input Parameters:
1046 + dm     - The DM
1047 - label  - DMLabel assigning ranks to remote roots
1048 
1049   Level: developer
1050 
1051 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1052 @*/
1053 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label) {
1054   IS              rankIS, pointIS, closureIS;
1055   const PetscInt *ranks, *points;
1056   PetscInt        numRanks, numPoints, r;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(DMLabelGetValueIS(label, &rankIS));
1060   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1061   PetscCall(ISGetIndices(rankIS, &ranks));
1062   for (r = 0; r < numRanks; ++r) {
1063     const PetscInt rank = ranks[r];
1064     PetscCall(DMLabelGetStratumIS(label, rank, &pointIS));
1065     PetscCall(ISGetLocalSize(pointIS, &numPoints));
1066     PetscCall(ISGetIndices(pointIS, &points));
1067     PetscCall(DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS));
1068     PetscCall(ISRestoreIndices(pointIS, &points));
1069     PetscCall(ISDestroy(&pointIS));
1070     PetscCall(DMLabelSetStratumIS(label, rank, closureIS));
1071     PetscCall(ISDestroy(&closureIS));
1072   }
1073   PetscCall(ISRestoreIndices(rankIS, &ranks));
1074   PetscCall(ISDestroy(&rankIS));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /*@
1079   DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label
1080 
1081   Input Parameters:
1082 + dm     - The DM
1083 - label  - DMLabel assigning ranks to remote roots
1084 
1085   Level: developer
1086 
1087 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1088 @*/
1089 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label) {
1090   IS              rankIS, pointIS;
1091   const PetscInt *ranks, *points;
1092   PetscInt        numRanks, numPoints, r, p, a, adjSize;
1093   PetscInt       *adj = NULL;
1094 
1095   PetscFunctionBegin;
1096   PetscCall(DMLabelGetValueIS(label, &rankIS));
1097   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1098   PetscCall(ISGetIndices(rankIS, &ranks));
1099   for (r = 0; r < numRanks; ++r) {
1100     const PetscInt rank = ranks[r];
1101 
1102     PetscCall(DMLabelGetStratumIS(label, rank, &pointIS));
1103     PetscCall(ISGetLocalSize(pointIS, &numPoints));
1104     PetscCall(ISGetIndices(pointIS, &points));
1105     for (p = 0; p < numPoints; ++p) {
1106       adjSize = PETSC_DETERMINE;
1107       PetscCall(DMPlexGetAdjacency(dm, points[p], &adjSize, &adj));
1108       for (a = 0; a < adjSize; ++a) PetscCall(DMLabelSetValue(label, adj[a], rank));
1109     }
1110     PetscCall(ISRestoreIndices(pointIS, &points));
1111     PetscCall(ISDestroy(&pointIS));
1112   }
1113   PetscCall(ISRestoreIndices(rankIS, &ranks));
1114   PetscCall(ISDestroy(&rankIS));
1115   PetscCall(PetscFree(adj));
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*@
1120   DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point SF
1121 
1122   Input Parameters:
1123 + dm     - The DM
1124 - label  - DMLabel assigning ranks to remote roots
1125 
1126   Level: developer
1127 
1128   Note: This is required when generating multi-level overlaps to capture
1129   overlap points from non-neighbouring partitions.
1130 
1131 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1132 @*/
1133 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label) {
1134   MPI_Comm        comm;
1135   PetscMPIInt     rank;
1136   PetscSF         sfPoint;
1137   DMLabel         lblRoots, lblLeaves;
1138   IS              rankIS, pointIS;
1139   const PetscInt *ranks;
1140   PetscInt        numRanks, r;
1141 
1142   PetscFunctionBegin;
1143   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1144   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1145   PetscCall(DMGetPointSF(dm, &sfPoint));
1146   /* Pull point contributions from remote leaves into local roots */
1147   PetscCall(DMLabelGather(label, sfPoint, &lblLeaves));
1148   PetscCall(DMLabelGetValueIS(lblLeaves, &rankIS));
1149   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1150   PetscCall(ISGetIndices(rankIS, &ranks));
1151   for (r = 0; r < numRanks; ++r) {
1152     const PetscInt remoteRank = ranks[r];
1153     if (remoteRank == rank) continue;
1154     PetscCall(DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS));
1155     PetscCall(DMLabelInsertIS(label, pointIS, remoteRank));
1156     PetscCall(ISDestroy(&pointIS));
1157   }
1158   PetscCall(ISRestoreIndices(rankIS, &ranks));
1159   PetscCall(ISDestroy(&rankIS));
1160   PetscCall(DMLabelDestroy(&lblLeaves));
1161   /* Push point contributions from roots into remote leaves */
1162   PetscCall(DMLabelDistribute(label, sfPoint, &lblRoots));
1163   PetscCall(DMLabelGetValueIS(lblRoots, &rankIS));
1164   PetscCall(ISGetLocalSize(rankIS, &numRanks));
1165   PetscCall(ISGetIndices(rankIS, &ranks));
1166   for (r = 0; r < numRanks; ++r) {
1167     const PetscInt remoteRank = ranks[r];
1168     if (remoteRank == rank) continue;
1169     PetscCall(DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS));
1170     PetscCall(DMLabelInsertIS(label, pointIS, remoteRank));
1171     PetscCall(ISDestroy(&pointIS));
1172   }
1173   PetscCall(ISRestoreIndices(rankIS, &ranks));
1174   PetscCall(ISDestroy(&rankIS));
1175   PetscCall(DMLabelDestroy(&lblRoots));
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 /*@
1180   DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label
1181 
1182   Input Parameters:
1183 + dm        - The DM
1184 . rootLabel - DMLabel assigning ranks to local roots
1185 - processSF - A star forest mapping into the local index on each remote rank
1186 
1187   Output Parameter:
1188 . leafLabel - DMLabel assigning ranks to remote roots
1189 
1190   Note: The rootLabel defines a send pattern by mapping local points to remote target ranks. The
1191   resulting leafLabel is a receiver mapping of remote roots to their parent rank.
1192 
1193   Level: developer
1194 
1195 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1196 @*/
1197 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel) {
1198   MPI_Comm           comm;
1199   PetscMPIInt        rank, size, r;
1200   PetscInt           p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize;
1201   PetscSF            sfPoint;
1202   PetscSection       rootSection;
1203   PetscSFNode       *rootPoints, *leafPoints;
1204   const PetscSFNode *remote;
1205   const PetscInt    *local, *neighbors;
1206   IS                 valueIS;
1207   PetscBool          mpiOverflow = PETSC_FALSE;
1208 
1209   PetscFunctionBegin;
1210   PetscCall(PetscLogEventBegin(DMPLEX_PartLabelInvert, dm, 0, 0, 0));
1211   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1212   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1213   PetscCallMPI(MPI_Comm_size(comm, &size));
1214   PetscCall(DMGetPointSF(dm, &sfPoint));
1215 
1216   /* Convert to (point, rank) and use actual owners */
1217   PetscCall(PetscSectionCreate(comm, &rootSection));
1218   PetscCall(PetscSectionSetChart(rootSection, 0, size));
1219   PetscCall(DMLabelGetValueIS(rootLabel, &valueIS));
1220   PetscCall(ISGetLocalSize(valueIS, &numNeighbors));
1221   PetscCall(ISGetIndices(valueIS, &neighbors));
1222   for (n = 0; n < numNeighbors; ++n) {
1223     PetscCall(DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints));
1224     PetscCall(PetscSectionAddDof(rootSection, neighbors[n], numPoints));
1225   }
1226   PetscCall(PetscSectionSetUp(rootSection));
1227   PetscCall(PetscSectionGetStorageSize(rootSection, &rootSize));
1228   PetscCall(PetscMalloc1(rootSize, &rootPoints));
1229   PetscCall(PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote));
1230   for (n = 0; n < numNeighbors; ++n) {
1231     IS              pointIS;
1232     const PetscInt *points;
1233 
1234     PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off));
1235     PetscCall(DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS));
1236     PetscCall(ISGetLocalSize(pointIS, &numPoints));
1237     PetscCall(ISGetIndices(pointIS, &points));
1238     for (p = 0; p < numPoints; ++p) {
1239       if (local) PetscCall(PetscFindInt(points[p], nleaves, local, &l));
1240       else l = -1;
1241       if (l >= 0) {
1242         rootPoints[off + p] = remote[l];
1243       } else {
1244         rootPoints[off + p].index = points[p];
1245         rootPoints[off + p].rank  = rank;
1246       }
1247     }
1248     PetscCall(ISRestoreIndices(pointIS, &points));
1249     PetscCall(ISDestroy(&pointIS));
1250   }
1251 
1252   /* Try to communicate overlap using All-to-All */
1253   if (!processSF) {
1254     PetscInt64   counter     = 0;
1255     PetscBool    locOverflow = PETSC_FALSE;
1256     PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls;
1257 
1258     PetscCall(PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls));
1259     for (n = 0; n < numNeighbors; ++n) {
1260       PetscCall(PetscSectionGetDof(rootSection, neighbors[n], &dof));
1261       PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off));
1262 #if defined(PETSC_USE_64BIT_INDICES)
1263       if (dof > PETSC_MPI_INT_MAX) {
1264         locOverflow = PETSC_TRUE;
1265         break;
1266       }
1267       if (off > PETSC_MPI_INT_MAX) {
1268         locOverflow = PETSC_TRUE;
1269         break;
1270       }
1271 #endif
1272       scounts[neighbors[n]] = (PetscMPIInt)dof;
1273       sdispls[neighbors[n]] = (PetscMPIInt)off;
1274     }
1275     PetscCallMPI(MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm));
1276     for (r = 0; r < size; ++r) {
1277       rdispls[r] = (int)counter;
1278       counter += rcounts[r];
1279     }
1280     if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE;
1281     PetscCallMPI(MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm));
1282     if (!mpiOverflow) {
1283       PetscCall(PetscInfo(dm, "Using Alltoallv for mesh distribution\n"));
1284       leafSize = (PetscInt)counter;
1285       PetscCall(PetscMalloc1(leafSize, &leafPoints));
1286       PetscCallMPI(MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm));
1287     }
1288     PetscCall(PetscFree4(scounts, sdispls, rcounts, rdispls));
1289   }
1290 
1291   /* Communicate overlap using process star forest */
1292   if (processSF || mpiOverflow) {
1293     PetscSF      procSF;
1294     PetscSection leafSection;
1295 
1296     if (processSF) {
1297       PetscCall(PetscInfo(dm, "Using processSF for mesh distribution\n"));
1298       PetscCall(PetscObjectReference((PetscObject)processSF));
1299       procSF = processSF;
1300     } else {
1301       PetscCall(PetscInfo(dm, "Using processSF for mesh distribution (MPI overflow)\n"));
1302       PetscCall(PetscSFCreate(comm, &procSF));
1303       PetscCall(PetscSFSetGraphWithPattern(procSF, NULL, PETSCSF_PATTERN_ALLTOALL));
1304     }
1305 
1306     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection));
1307     PetscCall(DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void **)&leafPoints));
1308     PetscCall(PetscSectionGetStorageSize(leafSection, &leafSize));
1309     PetscCall(PetscSectionDestroy(&leafSection));
1310     PetscCall(PetscSFDestroy(&procSF));
1311   }
1312 
1313   for (p = 0; p < leafSize; p++) PetscCall(DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank));
1314 
1315   PetscCall(ISRestoreIndices(valueIS, &neighbors));
1316   PetscCall(ISDestroy(&valueIS));
1317   PetscCall(PetscSectionDestroy(&rootSection));
1318   PetscCall(PetscFree(rootPoints));
1319   PetscCall(PetscFree(leafPoints));
1320   PetscCall(PetscLogEventEnd(DMPLEX_PartLabelInvert, dm, 0, 0, 0));
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /*@
1325   DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points
1326 
1327   Input Parameters:
1328 + dm    - The DM
1329 - label - DMLabel assigning ranks to remote roots
1330 
1331   Output Parameter:
1332 . sf    - The star forest communication context encapsulating the defined mapping
1333 
1334   Note: The incoming label is a receiver mapping of remote points to their parent rank.
1335 
1336   Level: developer
1337 
1338 .seealso: `DMPlexDistribute()`, `DMPlexCreateOverlap()`
1339 @*/
1340 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf) {
1341   PetscMPIInt     rank;
1342   PetscInt        n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors;
1343   PetscSFNode    *remotePoints;
1344   IS              remoteRootIS, neighborsIS;
1345   const PetscInt *remoteRoots, *neighbors;
1346 
1347   PetscFunctionBegin;
1348   PetscCall(PetscLogEventBegin(DMPLEX_PartLabelCreateSF, dm, 0, 0, 0));
1349   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
1350 
1351   PetscCall(DMLabelGetValueIS(label, &neighborsIS));
1352 #if 0
1353   {
1354     IS is;
1355     PetscCall(ISDuplicate(neighborsIS, &is));
1356     PetscCall(ISSort(is));
1357     PetscCall(ISDestroy(&neighborsIS));
1358     neighborsIS = is;
1359   }
1360 #endif
1361   PetscCall(ISGetLocalSize(neighborsIS, &nNeighbors));
1362   PetscCall(ISGetIndices(neighborsIS, &neighbors));
1363   for (numRemote = 0, n = 0; n < nNeighbors; ++n) {
1364     PetscCall(DMLabelGetStratumSize(label, neighbors[n], &numPoints));
1365     numRemote += numPoints;
1366   }
1367   PetscCall(PetscMalloc1(numRemote, &remotePoints));
1368   /* Put owned points first */
1369   PetscCall(DMLabelGetStratumSize(label, rank, &numPoints));
1370   if (numPoints > 0) {
1371     PetscCall(DMLabelGetStratumIS(label, rank, &remoteRootIS));
1372     PetscCall(ISGetIndices(remoteRootIS, &remoteRoots));
1373     for (p = 0; p < numPoints; p++) {
1374       remotePoints[idx].index = remoteRoots[p];
1375       remotePoints[idx].rank  = rank;
1376       idx++;
1377     }
1378     PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots));
1379     PetscCall(ISDestroy(&remoteRootIS));
1380   }
1381   /* Now add remote points */
1382   for (n = 0; n < nNeighbors; ++n) {
1383     const PetscInt nn = neighbors[n];
1384 
1385     PetscCall(DMLabelGetStratumSize(label, nn, &numPoints));
1386     if (nn == rank || numPoints <= 0) continue;
1387     PetscCall(DMLabelGetStratumIS(label, nn, &remoteRootIS));
1388     PetscCall(ISGetIndices(remoteRootIS, &remoteRoots));
1389     for (p = 0; p < numPoints; p++) {
1390       remotePoints[idx].index = remoteRoots[p];
1391       remotePoints[idx].rank  = nn;
1392       idx++;
1393     }
1394     PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots));
1395     PetscCall(ISDestroy(&remoteRootIS));
1396   }
1397   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), sf));
1398   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
1399   PetscCall(PetscSFSetGraph(*sf, pEnd - pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER));
1400   PetscCall(PetscSFSetUp(*sf));
1401   PetscCall(ISDestroy(&neighborsIS));
1402   PetscCall(PetscLogEventEnd(DMPLEX_PartLabelCreateSF, dm, 0, 0, 0));
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #if defined(PETSC_HAVE_PARMETIS)
1407 #include <parmetis.h>
1408 #endif
1409 
1410 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors
1411  * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take
1412  * them out in that case. */
1413 #if defined(PETSC_HAVE_PARMETIS)
1414 /*@C
1415 
1416   DMPlexRewriteSF - Rewrites the ownership of the SF of a DM (in place).
1417 
1418   Input parameters:
1419 + dm                - The DMPlex object.
1420 . n                 - The number of points.
1421 . pointsToRewrite   - The points in the SF whose ownership will change.
1422 . targetOwners      - New owner for each element in pointsToRewrite.
1423 - degrees           - Degrees of the points in the SF as obtained by PetscSFComputeDegreeBegin/PetscSFComputeDegreeEnd.
1424 
1425   Level: developer
1426 
1427 @*/
1428 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees) {
1429   PetscInt           pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs;
1430   PetscInt          *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew;
1431   PetscSFNode       *leafLocationsNew;
1432   const PetscSFNode *iremote;
1433   const PetscInt    *ilocal;
1434   PetscBool         *isLeaf;
1435   PetscSF            sf;
1436   MPI_Comm           comm;
1437   PetscMPIInt        rank, size;
1438 
1439   PetscFunctionBegin;
1440   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1441   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1442   PetscCallMPI(MPI_Comm_size(comm, &size));
1443   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
1444 
1445   PetscCall(DMGetPointSF(dm, &sf));
1446   PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote));
1447   PetscCall(PetscMalloc1(pEnd - pStart, &isLeaf));
1448   for (i = 0; i < pEnd - pStart; i++) isLeaf[i] = PETSC_FALSE;
1449   for (i = 0; i < nleafs; i++) isLeaf[ilocal[i] - pStart] = PETSC_TRUE;
1450 
1451   PetscCall(PetscMalloc1(pEnd - pStart + 1, &cumSumDegrees));
1452   cumSumDegrees[0] = 0;
1453   for (i = 1; i <= pEnd - pStart; i++) cumSumDegrees[i] = cumSumDegrees[i - 1] + degrees[i - 1];
1454   sumDegrees = cumSumDegrees[pEnd - pStart];
1455   /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */
1456 
1457   PetscCall(PetscMalloc1(sumDegrees, &locationsOfLeafs));
1458   PetscCall(PetscMalloc1(pEnd - pStart, &rankOnLeafs));
1459   for (i = 0; i < pEnd - pStart; i++) rankOnLeafs[i] = rank;
1460   PetscCall(PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs));
1461   PetscCall(PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs));
1462   PetscCall(PetscFree(rankOnLeafs));
1463 
1464   /* get the remote local points of my leaves */
1465   PetscCall(PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs));
1466   PetscCall(PetscMalloc1(pEnd - pStart, &points));
1467   for (i = 0; i < pEnd - pStart; i++) points[i] = pStart + i;
1468   PetscCall(PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs));
1469   PetscCall(PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs));
1470   PetscCall(PetscFree(points));
1471   /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */
1472   PetscCall(PetscMalloc1(pEnd - pStart, &newOwners));
1473   PetscCall(PetscMalloc1(pEnd - pStart, &newNumbers));
1474   for (i = 0; i < pEnd - pStart; i++) {
1475     newOwners[i]  = -1;
1476     newNumbers[i] = -1;
1477   }
1478   {
1479     PetscInt oldNumber, newNumber, oldOwner, newOwner;
1480     for (i = 0; i < n; i++) {
1481       oldNumber = pointsToRewrite[i];
1482       newNumber = -1;
1483       oldOwner  = rank;
1484       newOwner  = targetOwners[i];
1485       if (oldOwner == newOwner) {
1486         newNumber = oldNumber;
1487       } else {
1488         for (j = 0; j < degrees[oldNumber]; j++) {
1489           if (locationsOfLeafs[cumSumDegrees[oldNumber] + j] == newOwner) {
1490             newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber] + j];
1491             break;
1492           }
1493         }
1494       }
1495       PetscCheck(newNumber != -1, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex.");
1496 
1497       newOwners[oldNumber]  = newOwner;
1498       newNumbers[oldNumber] = newNumber;
1499     }
1500   }
1501   PetscCall(PetscFree(cumSumDegrees));
1502   PetscCall(PetscFree(locationsOfLeafs));
1503   PetscCall(PetscFree(remoteLocalPointOfLeafs));
1504 
1505   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners, MPI_REPLACE));
1506   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners, MPI_REPLACE));
1507   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers, MPI_REPLACE));
1508   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers, MPI_REPLACE));
1509 
1510   /* Now count how many leafs we have on each processor. */
1511   leafCounter = 0;
1512   for (i = 0; i < pEnd - pStart; i++) {
1513     if (newOwners[i] >= 0) {
1514       if (newOwners[i] != rank) leafCounter++;
1515     } else {
1516       if (isLeaf[i]) leafCounter++;
1517     }
1518   }
1519 
1520   /* Now set up the new sf by creating the leaf arrays */
1521   PetscCall(PetscMalloc1(leafCounter, &leafsNew));
1522   PetscCall(PetscMalloc1(leafCounter, &leafLocationsNew));
1523 
1524   leafCounter = 0;
1525   counter     = 0;
1526   for (i = 0; i < pEnd - pStart; i++) {
1527     if (newOwners[i] >= 0) {
1528       if (newOwners[i] != rank) {
1529         leafsNew[leafCounter]               = i;
1530         leafLocationsNew[leafCounter].rank  = newOwners[i];
1531         leafLocationsNew[leafCounter].index = newNumbers[i];
1532         leafCounter++;
1533       }
1534     } else {
1535       if (isLeaf[i]) {
1536         leafsNew[leafCounter]               = i;
1537         leafLocationsNew[leafCounter].rank  = iremote[counter].rank;
1538         leafLocationsNew[leafCounter].index = iremote[counter].index;
1539         leafCounter++;
1540       }
1541     }
1542     if (isLeaf[i]) counter++;
1543   }
1544 
1545   PetscCall(PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER));
1546   PetscCall(PetscFree(newOwners));
1547   PetscCall(PetscFree(newNumbers));
1548   PetscCall(PetscFree(isLeaf));
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer) {
1553   PetscInt   *distribution, min, max, sum;
1554   PetscMPIInt rank, size;
1555 
1556   PetscFunctionBegin;
1557   PetscCallMPI(MPI_Comm_size(comm, &size));
1558   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1559   PetscCall(PetscCalloc1(size, &distribution));
1560   for (PetscInt i = 0; i < n; i++) {
1561     if (part) distribution[part[i]] += vtxwgt[skip * i];
1562     else distribution[rank] += vtxwgt[skip * i];
1563   }
1564   PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm));
1565   min = distribution[0];
1566   max = distribution[0];
1567   sum = distribution[0];
1568   for (PetscInt i = 1; i < size; i++) {
1569     if (distribution[i] < min) min = distribution[i];
1570     if (distribution[i] > max) max = distribution[i];
1571     sum += distribution[i];
1572   }
1573   PetscCall(PetscViewerASCIIPrintf(viewer, "Min: %" PetscInt_FMT ", Avg: %" PetscInt_FMT ", Max: %" PetscInt_FMT ", Balance: %f\n", min, sum / size, max, (max * 1. * size) / sum));
1574   PetscCall(PetscFree(distribution));
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #endif
1579 
1580 /*@
1581   DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the PointSF of the DM inplace.
1582 
1583   Input parameters:
1584 + dm               - The DMPlex object.
1585 . entityDepth      - depth of the entity to balance (0 -> balance vertices).
1586 . useInitialGuess  - whether to use the current distribution as initial guess (only used by ParMETIS).
1587 - parallel         - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS.
1588 
1589   Output parameters:
1590 . success          - whether the graph partitioning was successful or not, optional. Unsuccessful simply means no change to the partitioning
1591 
1592   Options Database:
1593 +  -dm_plex_rebalance_shared_points_parmetis - Use ParMetis instead of Metis for the partitioner
1594 .  -dm_plex_rebalance_shared_points_use_initial_guess - Use current partition to bootstrap ParMetis partition
1595 .  -dm_plex_rebalance_shared_points_use_mat_partitioning - Use the MatPartitioning object to perform the partition, the prefix for those operations is -dm_plex_rebalance_shared_points_
1596 -  -dm_plex_rebalance_shared_points_monitor - Monitor the shared points rebalance process
1597 
1598   Developer Notes:
1599   This should use MatPartitioning to allow the use of any partitioner and not be hardwired to use ParMetis
1600 
1601   Level: intermediate
1602 @*/
1603 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success) {
1604 #if defined(PETSC_HAVE_PARMETIS)
1605   PetscSF            sf;
1606   PetscInt           ierr, i, j, idx, jdx;
1607   PetscInt           eBegin, eEnd, nroots, nleafs, pStart, pEnd;
1608   const PetscInt    *degrees, *ilocal;
1609   const PetscSFNode *iremote;
1610   PetscBool         *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned;
1611   PetscInt           numExclusivelyOwned, numNonExclusivelyOwned;
1612   PetscMPIInt        rank, size;
1613   PetscInt          *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers;
1614   const PetscInt    *cumSumVertices;
1615   PetscInt           offset, counter;
1616   PetscInt          *vtxwgt;
1617   const PetscInt    *xadj, *adjncy;
1618   PetscInt          *part, *options;
1619   PetscInt           nparts, wgtflag, numflag, ncon, edgecut;
1620   real_t            *ubvec;
1621   PetscInt          *firstVertices, *renumbering;
1622   PetscInt           failed, failedGlobal;
1623   MPI_Comm           comm;
1624   Mat                A;
1625   PetscViewer        viewer;
1626   PetscViewerFormat  format;
1627   PetscLayout        layout;
1628   real_t            *tpwgts;
1629   PetscMPIInt       *counts, *mpiCumSumVertices;
1630   PetscInt          *pointsToRewrite;
1631   PetscInt           numRows;
1632   PetscBool          done, usematpartitioning = PETSC_FALSE;
1633   IS                 ispart = NULL;
1634   MatPartitioning    mp;
1635   const char        *prefix;
1636 
1637   PetscFunctionBegin;
1638   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
1639   PetscCallMPI(MPI_Comm_size(comm, &size));
1640   if (size == 1) {
1641     if (success) *success = PETSC_TRUE;
1642     PetscFunctionReturn(0);
1643   }
1644   if (success) *success = PETSC_FALSE;
1645   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1646 
1647   parallel        = PETSC_FALSE;
1648   useInitialGuess = PETSC_FALSE;
1649   PetscObjectOptionsBegin((PetscObject)dm);
1650   PetscCall(PetscOptionsName("-dm_plex_rebalance_shared_points_parmetis", "Use ParMetis instead of Metis for the partitioner", "DMPlexRebalanceSharedPoints", &parallel));
1651   PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_initial_guess", "Use current partition to bootstrap ParMetis partition", "DMPlexRebalanceSharedPoints", useInitialGuess, &useInitialGuess, NULL));
1652   PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_mat_partitioning", "Use the MatPartitioning object to partition", "DMPlexRebalanceSharedPoints", usematpartitioning, &usematpartitioning, NULL));
1653   PetscCall(PetscOptionsViewer("-dm_plex_rebalance_shared_points_monitor", "Monitor the shared points rebalance process", "DMPlexRebalanceSharedPoints", &viewer, &format, NULL));
1654   PetscOptionsEnd();
1655   if (viewer) PetscCall(PetscViewerPushFormat(viewer, format));
1656 
1657   PetscCall(PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1658 
1659   PetscCall(DMGetOptionsPrefix(dm, &prefix));
1660   PetscCall(PetscOptionsGetViewer(comm, ((PetscObject)dm)->options, prefix, "-dm_rebalance_partition_view", &viewer, &format, NULL));
1661   if (viewer) PetscCall(PetscViewerPushFormat(viewer, format));
1662 
1663   /* Figure out all points in the plex that we are interested in balancing. */
1664   PetscCall(DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd));
1665   PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
1666   PetscCall(PetscMalloc1(pEnd - pStart, &toBalance));
1667   for (i = 0; i < pEnd - pStart; i++) toBalance[i] = (PetscBool)(i >= eBegin && i < eEnd);
1668 
1669   /* There are three types of points:
1670    * exclusivelyOwned: points that are owned by this process and only seen by this process
1671    * nonExclusivelyOwned: points that are owned by this process but seen by at least another process
1672    * leaf: a point that is seen by this process but owned by a different process
1673    */
1674   PetscCall(DMGetPointSF(dm, &sf));
1675   PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote));
1676   PetscCall(PetscMalloc1(pEnd - pStart, &isLeaf));
1677   PetscCall(PetscMalloc1(pEnd - pStart, &isNonExclusivelyOwned));
1678   PetscCall(PetscMalloc1(pEnd - pStart, &isExclusivelyOwned));
1679   for (i = 0; i < pEnd - pStart; i++) {
1680     isNonExclusivelyOwned[i] = PETSC_FALSE;
1681     isExclusivelyOwned[i]    = PETSC_FALSE;
1682     isLeaf[i]                = PETSC_FALSE;
1683   }
1684 
1685   /* mark all the leafs */
1686   for (i = 0; i < nleafs; i++) isLeaf[ilocal[i] - pStart] = PETSC_TRUE;
1687 
1688   /* for an owned point, we can figure out whether another processor sees it or
1689    * not by calculating its degree */
1690   PetscCall(PetscSFComputeDegreeBegin(sf, &degrees));
1691   PetscCall(PetscSFComputeDegreeEnd(sf, &degrees));
1692   numExclusivelyOwned    = 0;
1693   numNonExclusivelyOwned = 0;
1694   for (i = 0; i < pEnd - pStart; i++) {
1695     if (toBalance[i]) {
1696       if (degrees[i] > 0) {
1697         isNonExclusivelyOwned[i] = PETSC_TRUE;
1698         numNonExclusivelyOwned += 1;
1699       } else {
1700         if (!isLeaf[i]) {
1701           isExclusivelyOwned[i] = PETSC_TRUE;
1702           numExclusivelyOwned += 1;
1703         }
1704       }
1705     }
1706   }
1707 
1708   /* Build a graph with one vertex per core representing the
1709    * exclusively owned points and then one vertex per nonExclusively owned
1710    * point. */
1711   PetscCall(PetscLayoutCreate(comm, &layout));
1712   PetscCall(PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned));
1713   PetscCall(PetscLayoutSetUp(layout));
1714   PetscCall(PetscLayoutGetRanges(layout, &cumSumVertices));
1715   PetscCall(PetscMalloc1(pEnd - pStart, &globalNumbersOfLocalOwnedVertices));
1716   for (i = 0; i < pEnd - pStart; i++) globalNumbersOfLocalOwnedVertices[i] = pStart - 1;
1717   offset  = cumSumVertices[rank];
1718   counter = 0;
1719   for (i = 0; i < pEnd - pStart; i++) {
1720     if (toBalance[i]) {
1721       if (degrees[i] > 0) {
1722         globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset;
1723         counter++;
1724       }
1725     }
1726   }
1727 
1728   /* send the global numbers of vertices I own to the leafs so that they know to connect to it */
1729   PetscCall(PetscMalloc1(pEnd - pStart, &leafGlobalNumbers));
1730   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers, MPI_REPLACE));
1731   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers, MPI_REPLACE));
1732 
1733   /* Build the graph for partitioning */
1734   numRows = 1 + numNonExclusivelyOwned;
1735   PetscCall(PetscLogEventBegin(DMPLEX_RebalBuildGraph, dm, 0, 0, 0));
1736   PetscCall(MatCreate(comm, &A));
1737   PetscCall(MatSetType(A, MATMPIADJ));
1738   PetscCall(MatSetSizes(A, numRows, numRows, cumSumVertices[size], cumSumVertices[size]));
1739   idx = cumSumVertices[rank];
1740   for (i = 0; i < pEnd - pStart; i++) {
1741     if (toBalance[i]) {
1742       if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i];
1743       else if (isLeaf[i]) jdx = leafGlobalNumbers[i];
1744       else continue;
1745       PetscCall(MatSetValue(A, idx, jdx, 1, INSERT_VALUES));
1746       PetscCall(MatSetValue(A, jdx, idx, 1, INSERT_VALUES));
1747     }
1748   }
1749   PetscCall(PetscFree(globalNumbersOfLocalOwnedVertices));
1750   PetscCall(PetscFree(leafGlobalNumbers));
1751   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1752   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1753   PetscCall(PetscLogEventEnd(DMPLEX_RebalBuildGraph, dm, 0, 0, 0));
1754 
1755   nparts = size;
1756   ncon   = 1;
1757   PetscCall(PetscMalloc1(ncon * nparts, &tpwgts));
1758   for (i = 0; i < ncon * nparts; i++) tpwgts[i] = 1. / (nparts);
1759   PetscCall(PetscMalloc1(ncon, &ubvec));
1760   ubvec[0] = 1.05;
1761   ubvec[1] = 1.05;
1762 
1763   PetscCall(PetscMalloc1(ncon * (1 + numNonExclusivelyOwned), &vtxwgt));
1764   if (ncon == 2) {
1765     vtxwgt[0] = numExclusivelyOwned;
1766     vtxwgt[1] = 1;
1767     for (i = 0; i < numNonExclusivelyOwned; i++) {
1768       vtxwgt[ncon * (i + 1)]     = 1;
1769       vtxwgt[ncon * (i + 1) + 1] = 0;
1770     }
1771   } else {
1772     PetscInt base, ms;
1773     PetscCallMPI(MPI_Allreduce(&numExclusivelyOwned, &base, 1, MPIU_INT, MPIU_MAX, PetscObjectComm((PetscObject)dm)));
1774     PetscCall(MatGetSize(A, &ms, NULL));
1775     ms -= size;
1776     base      = PetscMax(base, ms);
1777     vtxwgt[0] = base + numExclusivelyOwned;
1778     for (i = 0; i < numNonExclusivelyOwned; i++) vtxwgt[i + 1] = 1;
1779   }
1780 
1781   if (viewer) {
1782     PetscCall(PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %" PetscInt_FMT " on interface of mesh distribution.\n", entityDepth));
1783     PetscCall(PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %" PetscInt_FMT "\n", cumSumVertices[size]));
1784   }
1785   /* TODO: Drop the parallel/sequential choice here and just use MatPartioner for much more flexibility */
1786   if (usematpartitioning) {
1787     const char *prefix;
1788 
1789     PetscCall(MatPartitioningCreate(PetscObjectComm((PetscObject)dm), &mp));
1790     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)mp, "dm_plex_rebalance_shared_points_"));
1791     PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix));
1792     PetscCall(PetscObjectPrependOptionsPrefix((PetscObject)mp, prefix));
1793     PetscCall(MatPartitioningSetAdjacency(mp, A));
1794     PetscCall(MatPartitioningSetNumberVertexWeights(mp, ncon));
1795     PetscCall(MatPartitioningSetVertexWeights(mp, vtxwgt));
1796     PetscCall(MatPartitioningSetFromOptions(mp));
1797     PetscCall(MatPartitioningApply(mp, &ispart));
1798     PetscCall(ISGetIndices(ispart, (const PetscInt **)&part));
1799   } else if (parallel) {
1800     if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n"));
1801     PetscCall(PetscMalloc1(cumSumVertices[rank + 1] - cumSumVertices[rank], &part));
1802     PetscCall(MatGetRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, &xadj, &adjncy, &done));
1803     PetscCheck(done, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Could not get adjacency information");
1804     PetscCall(PetscMalloc1(4, &options));
1805     options[0] = 1;
1806     options[1] = 0; /* Verbosity */
1807     if (viewer) options[1] = 1;
1808     options[2] = 0;                    /* Seed */
1809     options[3] = PARMETIS_PSR_COUPLED; /* Seed */
1810     wgtflag    = 2;
1811     numflag    = 0;
1812     if (useInitialGuess) {
1813       PetscCall(PetscViewerASCIIPrintf(viewer, "THIS DOES NOT WORK! I don't know why. Using current distribution of points as initial guess.\n"));
1814       for (i = 0; i < numRows; i++) part[i] = rank;
1815       if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n"));
1816       PetscStackPushExternal("ParMETIS_V3_RefineKway");
1817       PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0));
1818       ierr = ParMETIS_V3_RefineKway((PetscInt *)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1819       PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0));
1820       PetscStackPop;
1821       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()");
1822     } else {
1823       PetscStackPushExternal("ParMETIS_V3_PartKway");
1824       PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0));
1825       ierr = ParMETIS_V3_PartKway((PetscInt *)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1826       PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0));
1827       PetscStackPop;
1828       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()");
1829     }
1830     PetscCall(MatRestoreRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, &xadj, &adjncy, &done));
1831     PetscCall(PetscFree(options));
1832   } else {
1833     if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n"));
1834     Mat       As;
1835     PetscInt *partGlobal;
1836     PetscInt *numExclusivelyOwnedAll;
1837 
1838     PetscCall(PetscMalloc1(cumSumVertices[rank + 1] - cumSumVertices[rank], &part));
1839     PetscCall(MatGetSize(A, &numRows, NULL));
1840     PetscCall(PetscLogEventBegin(DMPLEX_RebalGatherGraph, dm, 0, 0, 0));
1841     PetscCall(MatMPIAdjToSeqRankZero(A, &As));
1842     PetscCall(PetscLogEventEnd(DMPLEX_RebalGatherGraph, dm, 0, 0, 0));
1843 
1844     PetscCall(PetscMalloc1(size, &numExclusivelyOwnedAll));
1845     numExclusivelyOwnedAll[rank] = numExclusivelyOwned;
1846     PetscCallMPI(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, numExclusivelyOwnedAll, 1, MPIU_INT, comm));
1847 
1848     PetscCall(PetscMalloc1(numRows, &partGlobal));
1849     PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0));
1850     if (rank == 0) {
1851       PetscInt *vtxwgt_g, numRows_g;
1852 
1853       PetscCall(MatGetRowIJ(As, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows_g, &xadj, &adjncy, &done));
1854       PetscCall(PetscMalloc1(2 * numRows_g, &vtxwgt_g));
1855       for (i = 0; i < size; i++) {
1856         vtxwgt_g[ncon * cumSumVertices[i]] = numExclusivelyOwnedAll[i];
1857         if (ncon > 1) vtxwgt_g[ncon * cumSumVertices[i] + 1] = 1;
1858         for (j = cumSumVertices[i] + 1; j < cumSumVertices[i + 1]; j++) {
1859           vtxwgt_g[ncon * j] = 1;
1860           if (ncon > 1) vtxwgt_g[2 * j + 1] = 0;
1861         }
1862       }
1863 
1864       PetscCall(PetscMalloc1(64, &options));
1865       ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */
1866       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()");
1867       options[METIS_OPTION_CONTIG] = 1;
1868       PetscStackPushExternal("METIS_PartGraphKway");
1869       ierr = METIS_PartGraphKway(&numRows_g, &ncon, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal);
1870       PetscStackPop;
1871       PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()");
1872       PetscCall(PetscFree(options));
1873       PetscCall(PetscFree(vtxwgt_g));
1874       PetscCall(MatRestoreRowIJ(As, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows_g, &xadj, &adjncy, &done));
1875       PetscCall(MatDestroy(&As));
1876     }
1877     PetscCall(PetscBarrier((PetscObject)dm));
1878     PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0));
1879     PetscCall(PetscFree(numExclusivelyOwnedAll));
1880 
1881     /* scatter the partitioning information to ranks */
1882     PetscCall(PetscLogEventBegin(DMPLEX_RebalScatterPart, 0, 0, 0, 0));
1883     PetscCall(PetscMalloc1(size, &counts));
1884     PetscCall(PetscMalloc1(size + 1, &mpiCumSumVertices));
1885     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(cumSumVertices[i + 1] - cumSumVertices[i], &(counts[i])));
1886     for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i])));
1887     PetscCallMPI(MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm));
1888     PetscCall(PetscFree(counts));
1889     PetscCall(PetscFree(mpiCumSumVertices));
1890     PetscCall(PetscFree(partGlobal));
1891     PetscCall(PetscLogEventEnd(DMPLEX_RebalScatterPart, 0, 0, 0, 0));
1892   }
1893   PetscCall(PetscFree(ubvec));
1894   PetscCall(PetscFree(tpwgts));
1895 
1896   /* Rename the result so that the vertex resembling the exclusively owned points stays on the same rank */
1897   PetscCall(PetscMalloc2(size, &firstVertices, size, &renumbering));
1898   firstVertices[rank] = part[0];
1899   PetscCallMPI(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, firstVertices, 1, MPIU_INT, comm));
1900   for (i = 0; i < size; i++) renumbering[firstVertices[i]] = i;
1901   for (i = 0; i < cumSumVertices[rank + 1] - cumSumVertices[rank]; i++) part[i] = renumbering[part[i]];
1902   PetscCall(PetscFree2(firstVertices, renumbering));
1903 
1904   /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */
1905   failed = (PetscInt)(part[0] != rank);
1906   PetscCallMPI(MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm));
1907   if (failedGlobal > 0) {
1908     PetscCheck(failedGlobal <= 0, comm, PETSC_ERR_LIB, "Metis/Parmetis returned a bad partion");
1909     PetscCall(PetscFree(vtxwgt));
1910     PetscCall(PetscFree(toBalance));
1911     PetscCall(PetscFree(isLeaf));
1912     PetscCall(PetscFree(isNonExclusivelyOwned));
1913     PetscCall(PetscFree(isExclusivelyOwned));
1914     if (usematpartitioning) {
1915       PetscCall(ISRestoreIndices(ispart, (const PetscInt **)&part));
1916       PetscCall(ISDestroy(&ispart));
1917     } else PetscCall(PetscFree(part));
1918     if (viewer) {
1919       PetscCall(PetscViewerPopFormat(viewer));
1920       PetscCall(PetscViewerDestroy(&viewer));
1921     }
1922     PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1923     PetscFunctionReturn(0);
1924   }
1925 
1926   /* Check how well we did distributing points*/
1927   if (viewer) {
1928     PetscCall(PetscViewerASCIIPrintf(viewer, "Number of owned entities of depth %" PetscInt_FMT ".\n", entityDepth));
1929     PetscCall(PetscViewerASCIIPrintf(viewer, "Initial      "));
1930     PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, NULL, viewer));
1931     PetscCall(PetscViewerASCIIPrintf(viewer, "Rebalanced   "));
1932     PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, part, viewer));
1933   }
1934 
1935   /* Check that every vertex is owned by a process that it is actually connected to. */
1936   PetscCall(MatGetRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &done));
1937   for (i = 1; i <= numNonExclusivelyOwned; i++) {
1938     PetscInt loc = 0;
1939     PetscCall(PetscFindInt(cumSumVertices[part[i]], xadj[i + 1] - xadj[i], &adjncy[xadj[i]], &loc));
1940     /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */
1941     if (loc < 0) part[i] = rank;
1942   }
1943   PetscCall(MatRestoreRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &done));
1944   PetscCall(MatDestroy(&A));
1945 
1946   /* See how significant the influences of the previous fixing up step was.*/
1947   if (viewer) {
1948     PetscCall(PetscViewerASCIIPrintf(viewer, "After fix    "));
1949     PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, part, viewer));
1950   }
1951   if (!usematpartitioning) PetscCall(PetscFree(vtxwgt));
1952   else PetscCall(MatPartitioningDestroy(&mp));
1953 
1954   PetscCall(PetscLayoutDestroy(&layout));
1955 
1956   PetscCall(PetscLogEventBegin(DMPLEX_RebalRewriteSF, dm, 0, 0, 0));
1957   /* Rewrite the SF to reflect the new ownership. */
1958   PetscCall(PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite));
1959   counter = 0;
1960   for (i = 0; i < pEnd - pStart; i++) {
1961     if (toBalance[i]) {
1962       if (isNonExclusivelyOwned[i]) {
1963         pointsToRewrite[counter] = i + pStart;
1964         counter++;
1965       }
1966     }
1967   }
1968   PetscCall(DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part + 1, degrees));
1969   PetscCall(PetscFree(pointsToRewrite));
1970   PetscCall(PetscLogEventEnd(DMPLEX_RebalRewriteSF, dm, 0, 0, 0));
1971 
1972   PetscCall(PetscFree(toBalance));
1973   PetscCall(PetscFree(isLeaf));
1974   PetscCall(PetscFree(isNonExclusivelyOwned));
1975   PetscCall(PetscFree(isExclusivelyOwned));
1976   if (usematpartitioning) {
1977     PetscCall(ISRestoreIndices(ispart, (const PetscInt **)&part));
1978     PetscCall(ISDestroy(&ispart));
1979   } else PetscCall(PetscFree(part));
1980   if (viewer) {
1981     PetscCall(PetscViewerPopFormat(viewer));
1982     PetscCall(PetscViewerDestroy(&viewer));
1983   }
1984   if (success) *success = PETSC_TRUE;
1985   PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1986   PetscFunctionReturn(0);
1987 #else
1988   SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis.");
1989 #endif
1990 }
1991