xref: /petsc/src/dm/impls/plex/plexpartition.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 #include <petsc/private/dmpleximpl.h>   /*I      "petscdmplex.h"   I*/
2 #include <petsc/private/partitionerimpl.h>
3 #include <petsc/private/hashseti.h>
4 
5 const char * const DMPlexCSRAlgorithms[] = {"mat", "graph", "overlap", "DMPlexCSRAlgorithm", "DM_PLEX_CSR_",NULL};
6 
7 static inline PetscInt DMPlex_GlobalID(PetscInt point) { return point >= 0 ? point : -(point+1); }
8 
9 static PetscErrorCode DMPlexCreatePartitionerGraph_Overlap(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
10 {
11   DM              ovdm;
12   PetscSF         sfPoint;
13   IS              cellNumbering;
14   const PetscInt *cellNum;
15   PetscInt       *adj = NULL, *vOffsets = NULL, *vAdj = NULL;
16   PetscBool       useCone, useClosure;
17   PetscInt        dim, depth, overlap, cStart, cEnd, c, v;
18   PetscMPIInt     rank, size;
19 
20   PetscFunctionBegin;
21   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank));
22   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject) dm), &size));
23   CHKERRQ(DMGetDimension(dm, &dim));
24   CHKERRQ(DMPlexGetDepth(dm, &depth));
25   if (dim != depth) {
26     /* We do not handle the uninterpolated case here */
27     CHKERRQ(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
28     /* DMPlexCreateNeighborCSR does not make a numbering */
29     if (globalNumbering) CHKERRQ(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
30     /* Different behavior for empty graphs */
31     if (!*numVertices) {
32       CHKERRQ(PetscMalloc1(1, offsets));
33       (*offsets)[0] = 0;
34     }
35     /* Broken in parallel */
36     if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
37     PetscFunctionReturn(0);
38   }
39   /* Always use FVM adjacency to create partitioner graph */
40   CHKERRQ(DMGetBasicAdjacency(dm, &useCone, &useClosure));
41   CHKERRQ(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
42   /* Need overlap >= 1 */
43   CHKERRQ(DMPlexGetOverlap(dm, &overlap));
44   if (size && overlap < 1) {
45     CHKERRQ(DMPlexDistributeOverlap(dm, 1, NULL, &ovdm));
46   } else {
47     CHKERRQ(PetscObjectReference((PetscObject) dm));
48     ovdm = dm;
49   }
50   CHKERRQ(DMGetPointSF(ovdm, &sfPoint));
51   CHKERRQ(DMPlexGetHeightStratum(ovdm, height, &cStart, &cEnd));
52   CHKERRQ(DMPlexCreateNumbering_Plex(ovdm, cStart, cEnd, 0, NULL, sfPoint, &cellNumbering));
53   if (globalNumbering) {
54     CHKERRQ(PetscObjectReference((PetscObject) cellNumbering));
55     *globalNumbering = cellNumbering;
56   }
57   CHKERRQ(ISGetIndices(cellNumbering, &cellNum));
58   /* Determine sizes */
59   for (*numVertices = 0, c = cStart; c < cEnd; ++c) {
60     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
61     if (cellNum[c] < 0) continue;
62     (*numVertices)++;
63   }
64   CHKERRQ(PetscCalloc1(*numVertices+1, &vOffsets));
65   for (c = cStart, v = 0; c < cEnd; ++c) {
66     PetscInt adjSize = PETSC_DETERMINE, a, vsize = 0;
67 
68     if (cellNum[c] < 0) continue;
69     CHKERRQ(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj));
70     for (a = 0; a < adjSize; ++a) {
71       const PetscInt point = adj[a];
72       if (point != c && cStart <= point && point < cEnd) ++vsize;
73     }
74     vOffsets[v+1] = vOffsets[v] + vsize;
75     ++v;
76   }
77   /* Determine adjacency */
78   CHKERRQ(PetscMalloc1(vOffsets[*numVertices], &vAdj));
79   for (c = cStart, v = 0; c < cEnd; ++c) {
80     PetscInt adjSize = PETSC_DETERMINE, a, off = vOffsets[v];
81 
82     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
83     if (cellNum[c] < 0) continue;
84     CHKERRQ(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj));
85     for (a = 0; a < adjSize; ++a) {
86       const PetscInt point = adj[a];
87       if (point != c && cStart <= point && point < cEnd) {
88         vAdj[off++] = DMPlex_GlobalID(cellNum[point]);
89       }
90     }
91     PetscCheck(off == vOffsets[v+1],PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Offsets %D should be %D", off, vOffsets[v+1]);
92     /* Sort adjacencies (not strictly necessary) */
93     CHKERRQ(PetscSortInt(off-vOffsets[v], &vAdj[vOffsets[v]]));
94     ++v;
95   }
96   CHKERRQ(PetscFree(adj));
97   CHKERRQ(ISRestoreIndices(cellNumbering, &cellNum));
98   CHKERRQ(ISDestroy(&cellNumbering));
99   CHKERRQ(DMSetBasicAdjacency(dm, useCone, useClosure));
100   CHKERRQ(DMDestroy(&ovdm));
101   if (offsets)   {*offsets = vOffsets;}
102   else           CHKERRQ(PetscFree(vOffsets));
103   if (adjacency) {*adjacency = vAdj;}
104   else           CHKERRQ(PetscFree(vAdj));
105   PetscFunctionReturn(0);
106 }
107 
108 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
109 {
110   PetscInt       dim, depth, p, pStart, pEnd, a, adjSize, idx, size;
111   PetscInt      *adj = NULL, *vOffsets = NULL, *graph = NULL;
112   IS             cellNumbering;
113   const PetscInt *cellNum;
114   PetscBool      useCone, useClosure;
115   PetscSection   section;
116   PetscSegBuffer adjBuffer;
117   PetscSF        sfPoint;
118   PetscInt       *adjCells = NULL, *remoteCells = NULL;
119   const PetscInt *local;
120   PetscInt       nroots, nleaves, l;
121   PetscMPIInt    rank;
122 
123   PetscFunctionBegin;
124   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank));
125   CHKERRQ(DMGetDimension(dm, &dim));
126   CHKERRQ(DMPlexGetDepth(dm, &depth));
127   if (dim != depth) {
128     /* We do not handle the uninterpolated case here */
129     CHKERRQ(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
130     /* DMPlexCreateNeighborCSR does not make a numbering */
131     if (globalNumbering) CHKERRQ(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
132     /* Different behavior for empty graphs */
133     if (!*numVertices) {
134       CHKERRQ(PetscMalloc1(1, offsets));
135       (*offsets)[0] = 0;
136     }
137     /* Broken in parallel */
138     if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
139     PetscFunctionReturn(0);
140   }
141   CHKERRQ(DMGetPointSF(dm, &sfPoint));
142   CHKERRQ(DMPlexGetHeightStratum(dm, height, &pStart, &pEnd));
143   /* Build adjacency graph via a section/segbuffer */
144   CHKERRQ(PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section));
145   CHKERRQ(PetscSectionSetChart(section, pStart, pEnd));
146   CHKERRQ(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer));
147   /* Always use FVM adjacency to create partitioner graph */
148   CHKERRQ(DMGetBasicAdjacency(dm, &useCone, &useClosure));
149   CHKERRQ(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
150   CHKERRQ(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering));
151   if (globalNumbering) {
152     CHKERRQ(PetscObjectReference((PetscObject)cellNumbering));
153     *globalNumbering = cellNumbering;
154   }
155   CHKERRQ(ISGetIndices(cellNumbering, &cellNum));
156   /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells
157      Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves)
158    */
159   CHKERRQ(PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL));
160   if (nroots >= 0) {
161     PetscInt fStart, fEnd, f;
162 
163     CHKERRQ(PetscCalloc2(nroots, &adjCells, nroots, &remoteCells));
164     CHKERRQ(DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd));
165     for (l = 0; l < nroots; ++l) adjCells[l] = -3;
166     for (f = fStart; f < fEnd; ++f) {
167       const PetscInt *support;
168       PetscInt        supportSize;
169 
170       CHKERRQ(DMPlexGetSupport(dm, f, &support));
171       CHKERRQ(DMPlexGetSupportSize(dm, f, &supportSize));
172       if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]]);
173       else if (supportSize == 2) {
174         CHKERRQ(PetscFindInt(support[0], nleaves, local, &p));
175         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1]]);
176         CHKERRQ(PetscFindInt(support[1], nleaves, local, &p));
177         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]]);
178       }
179       /* Handle non-conforming meshes */
180       if (supportSize > 2) {
181         PetscInt        numChildren, i;
182         const PetscInt *children;
183 
184         CHKERRQ(DMPlexGetTreeChildren(dm, f, &numChildren, &children));
185         for (i = 0; i < numChildren; ++i) {
186           const PetscInt child = children[i];
187           if (fStart <= child && child < fEnd) {
188             CHKERRQ(DMPlexGetSupport(dm, child, &support));
189             CHKERRQ(DMPlexGetSupportSize(dm, child, &supportSize));
190             if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]]);
191             else if (supportSize == 2) {
192               CHKERRQ(PetscFindInt(support[0], nleaves, local, &p));
193               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1]]);
194               CHKERRQ(PetscFindInt(support[1], nleaves, local, &p));
195               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]]);
196             }
197           }
198         }
199       }
200     }
201     for (l = 0; l < nroots; ++l) remoteCells[l] = -1;
202     CHKERRQ(PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells,MPI_REPLACE));
203     CHKERRQ(PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells,MPI_REPLACE));
204     CHKERRQ(PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX));
205     CHKERRQ(PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX));
206   }
207   /* Combine local and global adjacencies */
208   for (*numVertices = 0, p = pStart; p < pEnd; p++) {
209     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
210     if (nroots > 0) {if (cellNum[p] < 0) continue;}
211     /* Add remote cells */
212     if (remoteCells) {
213       const PetscInt gp = DMPlex_GlobalID(cellNum[p]);
214       PetscInt       coneSize, numChildren, c, i;
215       const PetscInt *cone, *children;
216 
217       CHKERRQ(DMPlexGetCone(dm, p, &cone));
218       CHKERRQ(DMPlexGetConeSize(dm, p, &coneSize));
219       for (c = 0; c < coneSize; ++c) {
220         const PetscInt point = cone[c];
221         if (remoteCells[point] >= 0 && remoteCells[point] != gp) {
222           PetscInt *PETSC_RESTRICT pBuf;
223           CHKERRQ(PetscSectionAddDof(section, p, 1));
224           CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
225           *pBuf = remoteCells[point];
226         }
227         /* Handle non-conforming meshes */
228         CHKERRQ(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
229         for (i = 0; i < numChildren; ++i) {
230           const PetscInt child = children[i];
231           if (remoteCells[child] >= 0 && remoteCells[child] != gp) {
232             PetscInt *PETSC_RESTRICT pBuf;
233             CHKERRQ(PetscSectionAddDof(section, p, 1));
234             CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
235             *pBuf = remoteCells[child];
236           }
237         }
238       }
239     }
240     /* Add local cells */
241     adjSize = PETSC_DETERMINE;
242     CHKERRQ(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
243     for (a = 0; a < adjSize; ++a) {
244       const PetscInt point = adj[a];
245       if (point != p && pStart <= point && point < pEnd) {
246         PetscInt *PETSC_RESTRICT pBuf;
247         CHKERRQ(PetscSectionAddDof(section, p, 1));
248         CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
249         *pBuf = DMPlex_GlobalID(cellNum[point]);
250       }
251     }
252     (*numVertices)++;
253   }
254   CHKERRQ(PetscFree(adj));
255   CHKERRQ(PetscFree2(adjCells, remoteCells));
256   CHKERRQ(DMSetBasicAdjacency(dm, useCone, useClosure));
257 
258   /* Derive CSR graph from section/segbuffer */
259   CHKERRQ(PetscSectionSetUp(section));
260   CHKERRQ(PetscSectionGetStorageSize(section, &size));
261   CHKERRQ(PetscMalloc1(*numVertices+1, &vOffsets));
262   for (idx = 0, p = pStart; p < pEnd; p++) {
263     if (nroots > 0) {if (cellNum[p] < 0) continue;}
264     CHKERRQ(PetscSectionGetOffset(section, p, &(vOffsets[idx++])));
265   }
266   vOffsets[*numVertices] = size;
267   CHKERRQ(PetscSegBufferExtractAlloc(adjBuffer, &graph));
268 
269   if (nroots >= 0) {
270     /* Filter out duplicate edges using section/segbuffer */
271     CHKERRQ(PetscSectionReset(section));
272     CHKERRQ(PetscSectionSetChart(section, 0, *numVertices));
273     for (p = 0; p < *numVertices; p++) {
274       PetscInt start = vOffsets[p], end = vOffsets[p+1];
275       PetscInt numEdges = end-start, *PETSC_RESTRICT edges;
276       CHKERRQ(PetscSortRemoveDupsInt(&numEdges, &graph[start]));
277       CHKERRQ(PetscSectionSetDof(section, p, numEdges));
278       CHKERRQ(PetscSegBufferGetInts(adjBuffer, numEdges, &edges));
279       CHKERRQ(PetscArraycpy(edges, &graph[start], numEdges));
280     }
281     CHKERRQ(PetscFree(vOffsets));
282     CHKERRQ(PetscFree(graph));
283     /* Derive CSR graph from section/segbuffer */
284     CHKERRQ(PetscSectionSetUp(section));
285     CHKERRQ(PetscSectionGetStorageSize(section, &size));
286     CHKERRQ(PetscMalloc1(*numVertices+1, &vOffsets));
287     for (idx = 0, p = 0; p < *numVertices; p++) {
288       CHKERRQ(PetscSectionGetOffset(section, p, &(vOffsets[idx++])));
289     }
290     vOffsets[*numVertices] = size;
291     CHKERRQ(PetscSegBufferExtractAlloc(adjBuffer, &graph));
292   } else {
293     /* Sort adjacencies (not strictly necessary) */
294     for (p = 0; p < *numVertices; p++) {
295       PetscInt start = vOffsets[p], end = vOffsets[p+1];
296       CHKERRQ(PetscSortInt(end-start, &graph[start]));
297     }
298   }
299 
300   if (offsets) *offsets = vOffsets;
301   if (adjacency) *adjacency = graph;
302 
303   /* Cleanup */
304   CHKERRQ(PetscSegBufferDestroy(&adjBuffer));
305   CHKERRQ(PetscSectionDestroy(&section));
306   CHKERRQ(ISRestoreIndices(cellNumbering, &cellNum));
307   CHKERRQ(ISDestroy(&cellNumbering));
308   PetscFunctionReturn(0);
309 }
310 
311 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
312 {
313   Mat            conn, CSR;
314   IS             fis, cis, cis_own;
315   PetscSF        sfPoint;
316   const PetscInt *rows, *cols, *ii, *jj;
317   PetscInt       *idxs,*idxs2;
318   PetscInt       dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd;
319   PetscMPIInt    rank;
320   PetscBool      flg;
321 
322   PetscFunctionBegin;
323   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank));
324   CHKERRQ(DMGetDimension(dm, &dim));
325   CHKERRQ(DMPlexGetDepth(dm, &depth));
326   if (dim != depth) {
327     /* We do not handle the uninterpolated case here */
328     CHKERRQ(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency));
329     /* DMPlexCreateNeighborCSR does not make a numbering */
330     if (globalNumbering) CHKERRQ(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering));
331     /* Different behavior for empty graphs */
332     if (!*numVertices) {
333       CHKERRQ(PetscMalloc1(1, offsets));
334       (*offsets)[0] = 0;
335     }
336     /* Broken in parallel */
337     if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
338     PetscFunctionReturn(0);
339   }
340   /* Interpolated and parallel case */
341 
342   /* numbering */
343   CHKERRQ(DMGetPointSF(dm, &sfPoint));
344   CHKERRQ(DMPlexGetHeightStratum(dm, height, &cStart, &cEnd));
345   CHKERRQ(DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd));
346   CHKERRQ(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis));
347   CHKERRQ(DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis));
348   if (globalNumbering) {
349     CHKERRQ(ISDuplicate(cis, globalNumbering));
350   }
351 
352   /* get positive global ids and local sizes for facets and cells */
353   CHKERRQ(ISGetLocalSize(fis, &m));
354   CHKERRQ(ISGetIndices(fis, &rows));
355   CHKERRQ(PetscMalloc1(m, &idxs));
356   for (i = 0, floc = 0; i < m; i++) {
357     const PetscInt p = rows[i];
358 
359     if (p < 0) {
360       idxs[i] = -(p+1);
361     } else {
362       idxs[i] = p;
363       floc   += 1;
364     }
365   }
366   CHKERRQ(ISRestoreIndices(fis, &rows));
367   CHKERRQ(ISDestroy(&fis));
368   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis));
369 
370   CHKERRQ(ISGetLocalSize(cis, &m));
371   CHKERRQ(ISGetIndices(cis, &cols));
372   CHKERRQ(PetscMalloc1(m, &idxs));
373   CHKERRQ(PetscMalloc1(m, &idxs2));
374   for (i = 0, cloc = 0; i < m; i++) {
375     const PetscInt p = cols[i];
376 
377     if (p < 0) {
378       idxs[i] = -(p+1);
379     } else {
380       idxs[i]       = p;
381       idxs2[cloc++] = p;
382     }
383   }
384   CHKERRQ(ISRestoreIndices(cis, &cols));
385   CHKERRQ(ISDestroy(&cis));
386   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis));
387   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own));
388 
389   /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */
390   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)dm), &conn));
391   CHKERRQ(MatSetSizes(conn, floc, cloc, M, N));
392   CHKERRQ(MatSetType(conn, MATMPIAIJ));
393   CHKERRQ(DMPlexGetMaxSizes(dm, NULL, &lm));
394   CHKERRMPI(MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject) dm)));
395   CHKERRQ(MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL));
396 
397   /* Assemble matrix */
398   CHKERRQ(ISGetIndices(fis, &rows));
399   CHKERRQ(ISGetIndices(cis, &cols));
400   for (c = cStart; c < cEnd; c++) {
401     const PetscInt *cone;
402     PetscInt        coneSize, row, col, f;
403 
404     col  = cols[c-cStart];
405     CHKERRQ(DMPlexGetCone(dm, c, &cone));
406     CHKERRQ(DMPlexGetConeSize(dm, c, &coneSize));
407     for (f = 0; f < coneSize; f++) {
408       const PetscScalar v = 1.0;
409       const PetscInt *children;
410       PetscInt        numChildren, ch;
411 
412       row  = rows[cone[f]-fStart];
413       CHKERRQ(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES));
414 
415       /* non-conforming meshes */
416       CHKERRQ(DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children));
417       for (ch = 0; ch < numChildren; ch++) {
418         const PetscInt child = children[ch];
419 
420         if (child < fStart || child >= fEnd) continue;
421         row  = rows[child-fStart];
422         CHKERRQ(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES));
423       }
424     }
425   }
426   CHKERRQ(ISRestoreIndices(fis, &rows));
427   CHKERRQ(ISRestoreIndices(cis, &cols));
428   CHKERRQ(ISDestroy(&fis));
429   CHKERRQ(ISDestroy(&cis));
430   CHKERRQ(MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY));
431   CHKERRQ(MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY));
432 
433   /* Get parallel CSR by doing conn^T * conn */
434   CHKERRQ(MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR));
435   CHKERRQ(MatDestroy(&conn));
436 
437   /* extract local part of the CSR */
438   CHKERRQ(MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn));
439   CHKERRQ(MatDestroy(&CSR));
440   CHKERRQ(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg));
441   PetscCheck(flg,PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
442 
443   /* get back requested output */
444   if (numVertices) *numVertices = m;
445   if (offsets) {
446     CHKERRQ(PetscCalloc1(m+1, &idxs));
447     for (i = 1; i < m+1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */
448     *offsets = idxs;
449   }
450   if (adjacency) {
451     CHKERRQ(PetscMalloc1(ii[m] - m, &idxs));
452     CHKERRQ(ISGetIndices(cis_own, &rows));
453     for (i = 0, c = 0; i < m; i++) {
454       PetscInt j, g = rows[i];
455 
456       for (j = ii[i]; j < ii[i+1]; j++) {
457         if (jj[j] == g) continue; /* again, self-connectivity */
458         idxs[c++] = jj[j];
459       }
460     }
461     PetscCheck(c == ii[m] - m,PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %D != %D",c,ii[m]-m);
462     CHKERRQ(ISRestoreIndices(cis_own, &rows));
463     *adjacency = idxs;
464   }
465 
466   /* cleanup */
467   CHKERRQ(ISDestroy(&cis_own));
468   CHKERRQ(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg));
469   PetscCheck(flg,PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
470   CHKERRQ(MatDestroy(&conn));
471   PetscFunctionReturn(0);
472 }
473 
474 /*@C
475   DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner
476 
477   Input Parameters:
478 + dm      - The mesh DM dm
479 - height  - Height of the strata from which to construct the graph
480 
481   Output Parameters:
482 + numVertices     - Number of vertices in the graph
483 . offsets         - Point offsets in the graph
484 . adjacency       - Point connectivity in the graph
485 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency".  Negative indicates that the cell is a duplicate from another process.
486 
487   The user can control the definition of adjacency for the mesh using DMSetAdjacency(). They should choose the combination appropriate for the function
488   representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree().
489 
490   Options Database Keys:
491 . -dm_plex_csr_alg <mat,graph,overlap> - Choose the algorithm for computing the CSR graph
492 
493   Level: developer
494 
495 .seealso: PetscPartitionerGetType(), PetscPartitionerCreate(), DMSetAdjacency()
496 @*/
497 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
498 {
499   DMPlexCSRAlgorithm alg = DM_PLEX_CSR_GRAPH;
500 
501   PetscFunctionBegin;
502   CHKERRQ(PetscOptionsGetEnum(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-dm_plex_csr_alg", DMPlexCSRAlgorithms, (PetscEnum *) &alg, NULL));
503   switch (alg) {
504     case DM_PLEX_CSR_MAT:
505       CHKERRQ(DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering));break;
506     case DM_PLEX_CSR_GRAPH:
507       CHKERRQ(DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering));break;
508     case DM_PLEX_CSR_OVERLAP:
509       CHKERRQ(DMPlexCreatePartitionerGraph_Overlap(dm, height, numVertices, offsets, adjacency, globalNumbering));break;
510   }
511   PetscFunctionReturn(0);
512 }
513 
514 /*@C
515   DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format.
516 
517   Collective on DM
518 
519   Input Parameters:
520 + dm - The DMPlex
521 - cellHeight - The height of mesh points to treat as cells (default should be 0)
522 
523   Output Parameters:
524 + numVertices - The number of local vertices in the graph, or cells in the mesh.
525 . offsets     - The offset to the adjacency list for each cell
526 - adjacency   - The adjacency list for all cells
527 
528   Note: This is suitable for input to a mesh partitioner like ParMetis.
529 
530   Level: advanced
531 
532 .seealso: DMPlexCreate()
533 @*/
534 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency)
535 {
536   const PetscInt maxFaceCases = 30;
537   PetscInt       numFaceCases = 0;
538   PetscInt       numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */
539   PetscInt      *off, *adj;
540   PetscInt      *neighborCells = NULL;
541   PetscInt       dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell;
542 
543   PetscFunctionBegin;
544   /* For parallel partitioning, I think you have to communicate supports */
545   CHKERRQ(DMGetDimension(dm, &dim));
546   cellDim = dim - cellHeight;
547   CHKERRQ(DMPlexGetDepth(dm, &depth));
548   CHKERRQ(DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd));
549   if (cEnd - cStart == 0) {
550     if (numVertices) *numVertices = 0;
551     if (offsets)   *offsets   = NULL;
552     if (adjacency) *adjacency = NULL;
553     PetscFunctionReturn(0);
554   }
555   numCells  = cEnd - cStart;
556   faceDepth = depth - cellHeight;
557   if (dim == depth) {
558     PetscInt f, fStart, fEnd;
559 
560     CHKERRQ(PetscCalloc1(numCells+1, &off));
561     /* Count neighboring cells */
562     CHKERRQ(DMPlexGetHeightStratum(dm, cellHeight+1, &fStart, &fEnd));
563     for (f = fStart; f < fEnd; ++f) {
564       const PetscInt *support;
565       PetscInt        supportSize;
566       CHKERRQ(DMPlexGetSupportSize(dm, f, &supportSize));
567       CHKERRQ(DMPlexGetSupport(dm, f, &support));
568       if (supportSize == 2) {
569         PetscInt numChildren;
570 
571         CHKERRQ(DMPlexGetTreeChildren(dm,f,&numChildren,NULL));
572         if (!numChildren) {
573           ++off[support[0]-cStart+1];
574           ++off[support[1]-cStart+1];
575         }
576       }
577     }
578     /* Prefix sum */
579     for (c = 1; c <= numCells; ++c) off[c] += off[c-1];
580     if (adjacency) {
581       PetscInt *tmp;
582 
583       CHKERRQ(PetscMalloc1(off[numCells], &adj));
584       CHKERRQ(PetscMalloc1(numCells+1, &tmp));
585       CHKERRQ(PetscArraycpy(tmp, off, numCells+1));
586       /* Get neighboring cells */
587       for (f = fStart; f < fEnd; ++f) {
588         const PetscInt *support;
589         PetscInt        supportSize;
590         CHKERRQ(DMPlexGetSupportSize(dm, f, &supportSize));
591         CHKERRQ(DMPlexGetSupport(dm, f, &support));
592         if (supportSize == 2) {
593           PetscInt numChildren;
594 
595           CHKERRQ(DMPlexGetTreeChildren(dm,f,&numChildren,NULL));
596           if (!numChildren) {
597             adj[tmp[support[0]-cStart]++] = support[1];
598             adj[tmp[support[1]-cStart]++] = support[0];
599           }
600         }
601       }
602       for (c = 0; c < cEnd-cStart; ++c) PetscAssert(tmp[c] == off[c+1],PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %d != %d for cell %d", tmp[c], off[c], c+cStart);
603       CHKERRQ(PetscFree(tmp));
604     }
605     if (numVertices) *numVertices = numCells;
606     if (offsets)   *offsets   = off;
607     if (adjacency) *adjacency = adj;
608     PetscFunctionReturn(0);
609   }
610   /* Setup face recognition */
611   if (faceDepth == 1) {
612     PetscInt cornersSeen[30] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; /* Could use PetscBT */
613 
614     for (c = cStart; c < cEnd; ++c) {
615       PetscInt corners;
616 
617       CHKERRQ(DMPlexGetConeSize(dm, c, &corners));
618       if (!cornersSeen[corners]) {
619         PetscInt nFV;
620 
621         PetscCheck(numFaceCases < maxFaceCases,PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases");
622         cornersSeen[corners] = 1;
623 
624         CHKERRQ(DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV));
625 
626         numFaceVertices[numFaceCases++] = nFV;
627       }
628     }
629   }
630   CHKERRQ(PetscCalloc1(numCells+1, &off));
631   /* Count neighboring cells */
632   for (cell = cStart; cell < cEnd; ++cell) {
633     PetscInt numNeighbors = PETSC_DETERMINE, n;
634 
635     CHKERRQ(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells));
636     /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
637     for (n = 0; n < numNeighbors; ++n) {
638       PetscInt        cellPair[2];
639       PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
640       PetscInt        meetSize = 0;
641       const PetscInt *meet    = NULL;
642 
643       cellPair[0] = cell; cellPair[1] = neighborCells[n];
644       if (cellPair[0] == cellPair[1]) continue;
645       if (!found) {
646         CHKERRQ(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet));
647         if (meetSize) {
648           PetscInt f;
649 
650           for (f = 0; f < numFaceCases; ++f) {
651             if (numFaceVertices[f] == meetSize) {
652               found = PETSC_TRUE;
653               break;
654             }
655           }
656         }
657         CHKERRQ(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet));
658       }
659       if (found) ++off[cell-cStart+1];
660     }
661   }
662   /* Prefix sum */
663   for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell-1];
664 
665   if (adjacency) {
666     CHKERRQ(PetscMalloc1(off[numCells], &adj));
667     /* Get neighboring cells */
668     for (cell = cStart; cell < cEnd; ++cell) {
669       PetscInt numNeighbors = PETSC_DETERMINE, n;
670       PetscInt cellOffset   = 0;
671 
672       CHKERRQ(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells));
673       /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
674       for (n = 0; n < numNeighbors; ++n) {
675         PetscInt        cellPair[2];
676         PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
677         PetscInt        meetSize = 0;
678         const PetscInt *meet    = NULL;
679 
680         cellPair[0] = cell; cellPair[1] = neighborCells[n];
681         if (cellPair[0] == cellPair[1]) continue;
682         if (!found) {
683           CHKERRQ(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet));
684           if (meetSize) {
685             PetscInt f;
686 
687             for (f = 0; f < numFaceCases; ++f) {
688               if (numFaceVertices[f] == meetSize) {
689                 found = PETSC_TRUE;
690                 break;
691               }
692             }
693           }
694           CHKERRQ(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet));
695         }
696         if (found) {
697           adj[off[cell-cStart]+cellOffset] = neighborCells[n];
698           ++cellOffset;
699         }
700       }
701     }
702   }
703   CHKERRQ(PetscFree(neighborCells));
704   if (numVertices) *numVertices = numCells;
705   if (offsets)   *offsets   = off;
706   if (adjacency) *adjacency = adj;
707   PetscFunctionReturn(0);
708 }
709 
710 /*@
711   PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh
712 
713   Collective on PetscPartitioner
714 
715   Input Parameters:
716 + part    - The PetscPartitioner
717 . targetSection - The PetscSection describing the absolute weight of each partition (can be NULL)
718 - dm      - The mesh DM
719 
720   Output Parameters:
721 + partSection     - The PetscSection giving the division of points by partition
722 - partition       - The list of points by partition
723 
724   Notes:
725     If the DM has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified
726     by the section in the transitive closure of the point.
727 
728   Level: developer
729 
730 .seealso DMPlexDistribute(), PetscPartitionerCreate(), PetscSectionCreate(), PetscSectionSetChart(), PetscPartitionerPartition()
731 @*/
732 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition)
733 {
734   PetscMPIInt    size;
735   PetscBool      isplex;
736   PetscSection   vertSection = NULL;
737 
738   PetscFunctionBegin;
739   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1);
740   PetscValidHeaderSpecific(dm, DM_CLASSID, 2);
741   if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3);
742   PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4);
743   PetscValidPointer(partition, 5);
744   CHKERRQ(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex));
745   PetscCheck(isplex,PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Not for type %s",((PetscObject)dm)->type_name);
746   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject) part), &size));
747   if (size == 1) {
748     PetscInt *points;
749     PetscInt  cStart, cEnd, c;
750 
751     CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
752     CHKERRQ(PetscSectionReset(partSection));
753     CHKERRQ(PetscSectionSetChart(partSection, 0, size));
754     CHKERRQ(PetscSectionSetDof(partSection, 0, cEnd-cStart));
755     CHKERRQ(PetscSectionSetUp(partSection));
756     CHKERRQ(PetscMalloc1(cEnd-cStart, &points));
757     for (c = cStart; c < cEnd; ++c) points[c] = c;
758     CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject) part), cEnd-cStart, points, PETSC_OWN_POINTER, partition));
759     PetscFunctionReturn(0);
760   }
761   if (part->height == 0) {
762     PetscInt numVertices = 0;
763     PetscInt *start     = NULL;
764     PetscInt *adjacency = NULL;
765     IS       globalNumbering;
766 
767     if (!part->noGraph || part->viewGraph) {
768       CHKERRQ(DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering));
769     } else { /* only compute the number of owned local vertices */
770       const PetscInt *idxs;
771       PetscInt       p, pStart, pEnd;
772 
773       CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd));
774       CHKERRQ(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering));
775       CHKERRQ(ISGetIndices(globalNumbering, &idxs));
776       for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1;
777       CHKERRQ(ISRestoreIndices(globalNumbering, &idxs));
778     }
779     if (part->usevwgt) {
780       PetscSection   section = dm->localSection, clSection = NULL;
781       IS             clPoints = NULL;
782       const PetscInt *gid,*clIdx;
783       PetscInt       v, p, pStart, pEnd;
784 
785       /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */
786       /* We do this only if the local section has been set */
787       if (section) {
788         CHKERRQ(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL));
789         if (!clSection) {
790           CHKERRQ(DMPlexCreateClosureIndex(dm,NULL));
791         }
792         CHKERRQ(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints));
793         CHKERRQ(ISGetIndices(clPoints,&clIdx));
794       }
795       CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd));
796       CHKERRQ(PetscSectionCreate(PETSC_COMM_SELF, &vertSection));
797       CHKERRQ(PetscSectionSetChart(vertSection, 0, numVertices));
798       if (globalNumbering) {
799         CHKERRQ(ISGetIndices(globalNumbering,&gid));
800       } else gid = NULL;
801       for (p = pStart, v = 0; p < pEnd; ++p) {
802         PetscInt dof = 1;
803 
804         /* skip cells in the overlap */
805         if (gid && gid[p-pStart] < 0) continue;
806 
807         if (section) {
808           PetscInt cl, clSize, clOff;
809 
810           dof  = 0;
811           CHKERRQ(PetscSectionGetDof(clSection, p, &clSize));
812           CHKERRQ(PetscSectionGetOffset(clSection, p, &clOff));
813           for (cl = 0; cl < clSize; cl+=2) {
814             PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */
815 
816             CHKERRQ(PetscSectionGetDof(section, clPoint, &clDof));
817             dof += clDof;
818           }
819         }
820         PetscCheck(dof,PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of dofs for point %D in the local section should be positive",p);
821         CHKERRQ(PetscSectionSetDof(vertSection, v, dof));
822         v++;
823       }
824       if (globalNumbering) {
825         CHKERRQ(ISRestoreIndices(globalNumbering,&gid));
826       }
827       if (clPoints) {
828         CHKERRQ(ISRestoreIndices(clPoints,&clIdx));
829       }
830       CHKERRQ(PetscSectionSetUp(vertSection));
831     }
832     CHKERRQ(PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition));
833     CHKERRQ(PetscFree(start));
834     CHKERRQ(PetscFree(adjacency));
835     if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */
836       const PetscInt *globalNum;
837       const PetscInt *partIdx;
838       PetscInt       *map, cStart, cEnd;
839       PetscInt       *adjusted, i, localSize, offset;
840       IS             newPartition;
841 
842       CHKERRQ(ISGetLocalSize(*partition,&localSize));
843       CHKERRQ(PetscMalloc1(localSize,&adjusted));
844       CHKERRQ(ISGetIndices(globalNumbering,&globalNum));
845       CHKERRQ(ISGetIndices(*partition,&partIdx));
846       CHKERRQ(PetscMalloc1(localSize,&map));
847       CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
848       for (i = cStart, offset = 0; i < cEnd; i++) {
849         if (globalNum[i - cStart] >= 0) map[offset++] = i;
850       }
851       for (i = 0; i < localSize; i++) {
852         adjusted[i] = map[partIdx[i]];
853       }
854       CHKERRQ(PetscFree(map));
855       CHKERRQ(ISRestoreIndices(*partition,&partIdx));
856       CHKERRQ(ISRestoreIndices(globalNumbering,&globalNum));
857       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,localSize,adjusted,PETSC_OWN_POINTER,&newPartition));
858       CHKERRQ(ISDestroy(&globalNumbering));
859       CHKERRQ(ISDestroy(partition));
860       *partition = newPartition;
861     }
862   } else SETERRQ(PetscObjectComm((PetscObject) part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %D for points to partition", part->height);
863   CHKERRQ(PetscSectionDestroy(&vertSection));
864   PetscFunctionReturn(0);
865 }
866 
867 /*@
868   DMPlexGetPartitioner - Get the mesh partitioner
869 
870   Not collective
871 
872   Input Parameter:
873 . dm - The DM
874 
875   Output Parameter:
876 . part - The PetscPartitioner
877 
878   Level: developer
879 
880   Note: This gets a borrowed reference, so the user should not destroy this PetscPartitioner.
881 
882 .seealso DMPlexDistribute(), DMPlexSetPartitioner(), PetscPartitionerDMPlexPartition(), PetscPartitionerCreate()
883 @*/
884 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part)
885 {
886   DM_Plex       *mesh = (DM_Plex *) dm->data;
887 
888   PetscFunctionBegin;
889   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
890   PetscValidPointer(part, 2);
891   *part = mesh->partitioner;
892   PetscFunctionReturn(0);
893 }
894 
895 /*@
896   DMPlexSetPartitioner - Set the mesh partitioner
897 
898   logically collective on DM
899 
900   Input Parameters:
901 + dm - The DM
902 - part - The partitioner
903 
904   Level: developer
905 
906   Note: Any existing PetscPartitioner will be destroyed.
907 
908 .seealso DMPlexDistribute(), DMPlexGetPartitioner(), PetscPartitionerCreate()
909 @*/
910 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part)
911 {
912   DM_Plex       *mesh = (DM_Plex *) dm->data;
913 
914   PetscFunctionBegin;
915   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
916   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2);
917   CHKERRQ(PetscObjectReference((PetscObject)part));
918   CHKERRQ(PetscPartitionerDestroy(&mesh->partitioner));
919   mesh->partitioner = part;
920   PetscFunctionReturn(0);
921 }
922 
923 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point)
924 {
925   const PetscInt *cone;
926   PetscInt       coneSize, c;
927   PetscBool      missing;
928 
929   PetscFunctionBeginHot;
930   CHKERRQ(PetscHSetIQueryAdd(ht, point, &missing));
931   if (missing) {
932     CHKERRQ(DMPlexGetCone(dm, point, &cone));
933     CHKERRQ(DMPlexGetConeSize(dm, point, &coneSize));
934     for (c = 0; c < coneSize; c++) {
935       CHKERRQ(DMPlexAddClosure_Private(dm, ht, cone[c]));
936     }
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down)
942 {
943   PetscFunctionBegin;
944   if (up) {
945     PetscInt parent;
946 
947     CHKERRQ(DMPlexGetTreeParent(dm,point,&parent,NULL));
948     if (parent != point) {
949       PetscInt closureSize, *closure = NULL, i;
950 
951       CHKERRQ(DMPlexGetTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure));
952       for (i = 0; i < closureSize; i++) {
953         PetscInt cpoint = closure[2*i];
954 
955         CHKERRQ(PetscHSetIAdd(ht, cpoint));
956         CHKERRQ(DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_TRUE,PETSC_FALSE));
957       }
958       CHKERRQ(DMPlexRestoreTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure));
959     }
960   }
961   if (down) {
962     PetscInt numChildren;
963     const PetscInt *children;
964 
965     CHKERRQ(DMPlexGetTreeChildren(dm,point,&numChildren,&children));
966     if (numChildren) {
967       PetscInt i;
968 
969       for (i = 0; i < numChildren; i++) {
970         PetscInt cpoint = children[i];
971 
972         CHKERRQ(PetscHSetIAdd(ht, cpoint));
973         CHKERRQ(DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_FALSE,PETSC_TRUE));
974       }
975     }
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point)
981 {
982   PetscInt       parent;
983 
984   PetscFunctionBeginHot;
985   CHKERRQ(DMPlexGetTreeParent(dm, point, &parent,NULL));
986   if (point != parent) {
987     const PetscInt *cone;
988     PetscInt       coneSize, c;
989 
990     CHKERRQ(DMPlexAddClosureTree_Up_Private(dm, ht, parent));
991     CHKERRQ(DMPlexAddClosure_Private(dm, ht, parent));
992     CHKERRQ(DMPlexGetCone(dm, parent, &cone));
993     CHKERRQ(DMPlexGetConeSize(dm, parent, &coneSize));
994     for (c = 0; c < coneSize; c++) {
995       const PetscInt cp = cone[c];
996 
997       CHKERRQ(DMPlexAddClosureTree_Up_Private(dm, ht, cp));
998     }
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point)
1004 {
1005   PetscInt       i, numChildren;
1006   const PetscInt *children;
1007 
1008   PetscFunctionBeginHot;
1009   CHKERRQ(DMPlexGetTreeChildren(dm, point, &numChildren, &children));
1010   for (i = 0; i < numChildren; i++) {
1011     CHKERRQ(PetscHSetIAdd(ht, children[i]));
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point)
1017 {
1018   const PetscInt *cone;
1019   PetscInt       coneSize, c;
1020 
1021   PetscFunctionBeginHot;
1022   CHKERRQ(PetscHSetIAdd(ht, point));
1023   CHKERRQ(DMPlexAddClosureTree_Up_Private(dm, ht, point));
1024   CHKERRQ(DMPlexAddClosureTree_Down_Private(dm, ht, point));
1025   CHKERRQ(DMPlexGetCone(dm, point, &cone));
1026   CHKERRQ(DMPlexGetConeSize(dm, point, &coneSize));
1027   for (c = 0; c < coneSize; c++) {
1028     CHKERRQ(DMPlexAddClosureTree_Private(dm, ht, cone[c]));
1029   }
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS)
1034 {
1035   DM_Plex         *mesh = (DM_Plex *)dm->data;
1036   const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE;
1037   PetscInt        nelems, *elems, off = 0, p;
1038   PetscHSetI      ht = NULL;
1039 
1040   PetscFunctionBegin;
1041   CHKERRQ(PetscHSetICreate(&ht));
1042   CHKERRQ(PetscHSetIResize(ht, numPoints*16));
1043   if (!hasTree) {
1044     for (p = 0; p < numPoints; ++p) {
1045       CHKERRQ(DMPlexAddClosure_Private(dm, ht, points[p]));
1046     }
1047   } else {
1048 #if 1
1049     for (p = 0; p < numPoints; ++p) {
1050       CHKERRQ(DMPlexAddClosureTree_Private(dm, ht, points[p]));
1051     }
1052 #else
1053     PetscInt  *closure = NULL, closureSize, c;
1054     for (p = 0; p < numPoints; ++p) {
1055       CHKERRQ(DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure));
1056       for (c = 0; c < closureSize*2; c += 2) {
1057         CHKERRQ(PetscHSetIAdd(ht, closure[c]));
1058         if (hasTree) CHKERRQ(DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE));
1059       }
1060     }
1061     if (closure) CHKERRQ(DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure));
1062 #endif
1063   }
1064   CHKERRQ(PetscHSetIGetSize(ht, &nelems));
1065   CHKERRQ(PetscMalloc1(nelems, &elems));
1066   CHKERRQ(PetscHSetIGetElems(ht, &off, elems));
1067   CHKERRQ(PetscHSetIDestroy(&ht));
1068   CHKERRQ(PetscSortInt(nelems, elems));
1069   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS));
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 /*@
1074   DMPlexPartitionLabelClosure - Add the closure of all points to the partition label
1075 
1076   Input Parameters:
1077 + dm     - The DM
1078 - label  - DMLabel assigning ranks to remote roots
1079 
1080   Level: developer
1081 
1082 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1083 @*/
1084 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label)
1085 {
1086   IS              rankIS,   pointIS, closureIS;
1087   const PetscInt *ranks,   *points;
1088   PetscInt        numRanks, numPoints, r;
1089 
1090   PetscFunctionBegin;
1091   CHKERRQ(DMLabelGetValueIS(label, &rankIS));
1092   CHKERRQ(ISGetLocalSize(rankIS, &numRanks));
1093   CHKERRQ(ISGetIndices(rankIS, &ranks));
1094   for (r = 0; r < numRanks; ++r) {
1095     const PetscInt rank = ranks[r];
1096     CHKERRQ(DMLabelGetStratumIS(label, rank, &pointIS));
1097     CHKERRQ(ISGetLocalSize(pointIS, &numPoints));
1098     CHKERRQ(ISGetIndices(pointIS, &points));
1099     CHKERRQ(DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS));
1100     CHKERRQ(ISRestoreIndices(pointIS, &points));
1101     CHKERRQ(ISDestroy(&pointIS));
1102     CHKERRQ(DMLabelSetStratumIS(label, rank, closureIS));
1103     CHKERRQ(ISDestroy(&closureIS));
1104   }
1105   CHKERRQ(ISRestoreIndices(rankIS, &ranks));
1106   CHKERRQ(ISDestroy(&rankIS));
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 /*@
1111   DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label
1112 
1113   Input Parameters:
1114 + dm     - The DM
1115 - label  - DMLabel assigning ranks to remote roots
1116 
1117   Level: developer
1118 
1119 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1120 @*/
1121 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label)
1122 {
1123   IS              rankIS,   pointIS;
1124   const PetscInt *ranks,   *points;
1125   PetscInt        numRanks, numPoints, r, p, a, adjSize;
1126   PetscInt       *adj = NULL;
1127 
1128   PetscFunctionBegin;
1129   CHKERRQ(DMLabelGetValueIS(label, &rankIS));
1130   CHKERRQ(ISGetLocalSize(rankIS, &numRanks));
1131   CHKERRQ(ISGetIndices(rankIS, &ranks));
1132   for (r = 0; r < numRanks; ++r) {
1133     const PetscInt rank = ranks[r];
1134 
1135     CHKERRQ(DMLabelGetStratumIS(label, rank, &pointIS));
1136     CHKERRQ(ISGetLocalSize(pointIS, &numPoints));
1137     CHKERRQ(ISGetIndices(pointIS, &points));
1138     for (p = 0; p < numPoints; ++p) {
1139       adjSize = PETSC_DETERMINE;
1140       CHKERRQ(DMPlexGetAdjacency(dm, points[p], &adjSize, &adj));
1141       for (a = 0; a < adjSize; ++a) CHKERRQ(DMLabelSetValue(label, adj[a], rank));
1142     }
1143     CHKERRQ(ISRestoreIndices(pointIS, &points));
1144     CHKERRQ(ISDestroy(&pointIS));
1145   }
1146   CHKERRQ(ISRestoreIndices(rankIS, &ranks));
1147   CHKERRQ(ISDestroy(&rankIS));
1148   CHKERRQ(PetscFree(adj));
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 /*@
1153   DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point SF
1154 
1155   Input Parameters:
1156 + dm     - The DM
1157 - label  - DMLabel assigning ranks to remote roots
1158 
1159   Level: developer
1160 
1161   Note: This is required when generating multi-level overlaps to capture
1162   overlap points from non-neighbouring partitions.
1163 
1164 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1165 @*/
1166 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label)
1167 {
1168   MPI_Comm        comm;
1169   PetscMPIInt     rank;
1170   PetscSF         sfPoint;
1171   DMLabel         lblRoots, lblLeaves;
1172   IS              rankIS, pointIS;
1173   const PetscInt *ranks;
1174   PetscInt        numRanks, r;
1175 
1176   PetscFunctionBegin;
1177   CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm));
1178   CHKERRMPI(MPI_Comm_rank(comm, &rank));
1179   CHKERRQ(DMGetPointSF(dm, &sfPoint));
1180   /* Pull point contributions from remote leaves into local roots */
1181   CHKERRQ(DMLabelGather(label, sfPoint, &lblLeaves));
1182   CHKERRQ(DMLabelGetValueIS(lblLeaves, &rankIS));
1183   CHKERRQ(ISGetLocalSize(rankIS, &numRanks));
1184   CHKERRQ(ISGetIndices(rankIS, &ranks));
1185   for (r = 0; r < numRanks; ++r) {
1186     const PetscInt remoteRank = ranks[r];
1187     if (remoteRank == rank) continue;
1188     CHKERRQ(DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS));
1189     CHKERRQ(DMLabelInsertIS(label, pointIS, remoteRank));
1190     CHKERRQ(ISDestroy(&pointIS));
1191   }
1192   CHKERRQ(ISRestoreIndices(rankIS, &ranks));
1193   CHKERRQ(ISDestroy(&rankIS));
1194   CHKERRQ(DMLabelDestroy(&lblLeaves));
1195   /* Push point contributions from roots into remote leaves */
1196   CHKERRQ(DMLabelDistribute(label, sfPoint, &lblRoots));
1197   CHKERRQ(DMLabelGetValueIS(lblRoots, &rankIS));
1198   CHKERRQ(ISGetLocalSize(rankIS, &numRanks));
1199   CHKERRQ(ISGetIndices(rankIS, &ranks));
1200   for (r = 0; r < numRanks; ++r) {
1201     const PetscInt remoteRank = ranks[r];
1202     if (remoteRank == rank) continue;
1203     CHKERRQ(DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS));
1204     CHKERRQ(DMLabelInsertIS(label, pointIS, remoteRank));
1205     CHKERRQ(ISDestroy(&pointIS));
1206   }
1207   CHKERRQ(ISRestoreIndices(rankIS, &ranks));
1208   CHKERRQ(ISDestroy(&rankIS));
1209   CHKERRQ(DMLabelDestroy(&lblRoots));
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*@
1214   DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label
1215 
1216   Input Parameters:
1217 + dm        - The DM
1218 . rootLabel - DMLabel assigning ranks to local roots
1219 - processSF - A star forest mapping into the local index on each remote rank
1220 
1221   Output Parameter:
1222 . leafLabel - DMLabel assigning ranks to remote roots
1223 
1224   Note: The rootLabel defines a send pattern by mapping local points to remote target ranks. The
1225   resulting leafLabel is a receiver mapping of remote roots to their parent rank.
1226 
1227   Level: developer
1228 
1229 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1230 @*/
1231 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel)
1232 {
1233   MPI_Comm           comm;
1234   PetscMPIInt        rank, size, r;
1235   PetscInt           p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize;
1236   PetscSF            sfPoint;
1237   PetscSection       rootSection;
1238   PetscSFNode       *rootPoints, *leafPoints;
1239   const PetscSFNode *remote;
1240   const PetscInt    *local, *neighbors;
1241   IS                 valueIS;
1242   PetscBool          mpiOverflow = PETSC_FALSE;
1243 
1244   PetscFunctionBegin;
1245   CHKERRQ(PetscLogEventBegin(DMPLEX_PartLabelInvert,dm,0,0,0));
1246   CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm));
1247   CHKERRMPI(MPI_Comm_rank(comm, &rank));
1248   CHKERRMPI(MPI_Comm_size(comm, &size));
1249   CHKERRQ(DMGetPointSF(dm, &sfPoint));
1250 
1251   /* Convert to (point, rank) and use actual owners */
1252   CHKERRQ(PetscSectionCreate(comm, &rootSection));
1253   CHKERRQ(PetscSectionSetChart(rootSection, 0, size));
1254   CHKERRQ(DMLabelGetValueIS(rootLabel, &valueIS));
1255   CHKERRQ(ISGetLocalSize(valueIS, &numNeighbors));
1256   CHKERRQ(ISGetIndices(valueIS, &neighbors));
1257   for (n = 0; n < numNeighbors; ++n) {
1258     CHKERRQ(DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints));
1259     CHKERRQ(PetscSectionAddDof(rootSection, neighbors[n], numPoints));
1260   }
1261   CHKERRQ(PetscSectionSetUp(rootSection));
1262   CHKERRQ(PetscSectionGetStorageSize(rootSection, &rootSize));
1263   CHKERRQ(PetscMalloc1(rootSize, &rootPoints));
1264   CHKERRQ(PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote));
1265   for (n = 0; n < numNeighbors; ++n) {
1266     IS              pointIS;
1267     const PetscInt *points;
1268 
1269     CHKERRQ(PetscSectionGetOffset(rootSection, neighbors[n], &off));
1270     CHKERRQ(DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS));
1271     CHKERRQ(ISGetLocalSize(pointIS, &numPoints));
1272     CHKERRQ(ISGetIndices(pointIS, &points));
1273     for (p = 0; p < numPoints; ++p) {
1274       if (local) CHKERRQ(PetscFindInt(points[p], nleaves, local, &l));
1275       else       {l = -1;}
1276       if (l >= 0) {rootPoints[off+p] = remote[l];}
1277       else        {rootPoints[off+p].index = points[p]; rootPoints[off+p].rank = rank;}
1278     }
1279     CHKERRQ(ISRestoreIndices(pointIS, &points));
1280     CHKERRQ(ISDestroy(&pointIS));
1281   }
1282 
1283   /* Try to communicate overlap using All-to-All */
1284   if (!processSF) {
1285     PetscInt64  counter = 0;
1286     PetscBool   locOverflow = PETSC_FALSE;
1287     PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls;
1288 
1289     CHKERRQ(PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls));
1290     for (n = 0; n < numNeighbors; ++n) {
1291       CHKERRQ(PetscSectionGetDof(rootSection, neighbors[n], &dof));
1292       CHKERRQ(PetscSectionGetOffset(rootSection, neighbors[n], &off));
1293 #if defined(PETSC_USE_64BIT_INDICES)
1294       if (dof > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;}
1295       if (off > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;}
1296 #endif
1297       scounts[neighbors[n]] = (PetscMPIInt) dof;
1298       sdispls[neighbors[n]] = (PetscMPIInt) off;
1299     }
1300     CHKERRMPI(MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm));
1301     for (r = 0; r < size; ++r) { rdispls[r] = (int)counter; counter += rcounts[r]; }
1302     if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE;
1303     CHKERRMPI(MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm));
1304     if (!mpiOverflow) {
1305       CHKERRQ(PetscInfo(dm,"Using Alltoallv for mesh distribution\n"));
1306       leafSize = (PetscInt) counter;
1307       CHKERRQ(PetscMalloc1(leafSize, &leafPoints));
1308       CHKERRMPI(MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm));
1309     }
1310     CHKERRQ(PetscFree4(scounts, sdispls, rcounts, rdispls));
1311   }
1312 
1313   /* Communicate overlap using process star forest */
1314   if (processSF || mpiOverflow) {
1315     PetscSF      procSF;
1316     PetscSection leafSection;
1317 
1318     if (processSF) {
1319       CHKERRQ(PetscInfo(dm,"Using processSF for mesh distribution\n"));
1320       CHKERRQ(PetscObjectReference((PetscObject)processSF));
1321       procSF = processSF;
1322     } else {
1323       CHKERRQ(PetscInfo(dm,"Using processSF for mesh distribution (MPI overflow)\n"));
1324       CHKERRQ(PetscSFCreate(comm,&procSF));
1325       CHKERRQ(PetscSFSetGraphWithPattern(procSF,NULL,PETSCSF_PATTERN_ALLTOALL));
1326     }
1327 
1328     CHKERRQ(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection));
1329     CHKERRQ(DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void**) &leafPoints));
1330     CHKERRQ(PetscSectionGetStorageSize(leafSection, &leafSize));
1331     CHKERRQ(PetscSectionDestroy(&leafSection));
1332     CHKERRQ(PetscSFDestroy(&procSF));
1333   }
1334 
1335   for (p = 0; p < leafSize; p++) {
1336     CHKERRQ(DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank));
1337   }
1338 
1339   CHKERRQ(ISRestoreIndices(valueIS, &neighbors));
1340   CHKERRQ(ISDestroy(&valueIS));
1341   CHKERRQ(PetscSectionDestroy(&rootSection));
1342   CHKERRQ(PetscFree(rootPoints));
1343   CHKERRQ(PetscFree(leafPoints));
1344   CHKERRQ(PetscLogEventEnd(DMPLEX_PartLabelInvert,dm,0,0,0));
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /*@
1349   DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points
1350 
1351   Input Parameters:
1352 + dm    - The DM
1353 - label - DMLabel assigning ranks to remote roots
1354 
1355   Output Parameter:
1356 . sf    - The star forest communication context encapsulating the defined mapping
1357 
1358   Note: The incoming label is a receiver mapping of remote points to their parent rank.
1359 
1360   Level: developer
1361 
1362 .seealso: DMPlexDistribute(), DMPlexCreateOverlap()
1363 @*/
1364 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf)
1365 {
1366   PetscMPIInt     rank;
1367   PetscInt        n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors;
1368   PetscSFNode    *remotePoints;
1369   IS              remoteRootIS, neighborsIS;
1370   const PetscInt *remoteRoots, *neighbors;
1371 
1372   PetscFunctionBegin;
1373   CHKERRQ(PetscLogEventBegin(DMPLEX_PartLabelCreateSF,dm,0,0,0));
1374   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank));
1375 
1376   CHKERRQ(DMLabelGetValueIS(label, &neighborsIS));
1377 #if 0
1378   {
1379     IS is;
1380     CHKERRQ(ISDuplicate(neighborsIS, &is));
1381     CHKERRQ(ISSort(is));
1382     CHKERRQ(ISDestroy(&neighborsIS));
1383     neighborsIS = is;
1384   }
1385 #endif
1386   CHKERRQ(ISGetLocalSize(neighborsIS, &nNeighbors));
1387   CHKERRQ(ISGetIndices(neighborsIS, &neighbors));
1388   for (numRemote = 0, n = 0; n < nNeighbors; ++n) {
1389     CHKERRQ(DMLabelGetStratumSize(label, neighbors[n], &numPoints));
1390     numRemote += numPoints;
1391   }
1392   CHKERRQ(PetscMalloc1(numRemote, &remotePoints));
1393   /* Put owned points first */
1394   CHKERRQ(DMLabelGetStratumSize(label, rank, &numPoints));
1395   if (numPoints > 0) {
1396     CHKERRQ(DMLabelGetStratumIS(label, rank, &remoteRootIS));
1397     CHKERRQ(ISGetIndices(remoteRootIS, &remoteRoots));
1398     for (p = 0; p < numPoints; p++) {
1399       remotePoints[idx].index = remoteRoots[p];
1400       remotePoints[idx].rank = rank;
1401       idx++;
1402     }
1403     CHKERRQ(ISRestoreIndices(remoteRootIS, &remoteRoots));
1404     CHKERRQ(ISDestroy(&remoteRootIS));
1405   }
1406   /* Now add remote points */
1407   for (n = 0; n < nNeighbors; ++n) {
1408     const PetscInt nn = neighbors[n];
1409 
1410     CHKERRQ(DMLabelGetStratumSize(label, nn, &numPoints));
1411     if (nn == rank || numPoints <= 0) continue;
1412     CHKERRQ(DMLabelGetStratumIS(label, nn, &remoteRootIS));
1413     CHKERRQ(ISGetIndices(remoteRootIS, &remoteRoots));
1414     for (p = 0; p < numPoints; p++) {
1415       remotePoints[idx].index = remoteRoots[p];
1416       remotePoints[idx].rank = nn;
1417       idx++;
1418     }
1419     CHKERRQ(ISRestoreIndices(remoteRootIS, &remoteRoots));
1420     CHKERRQ(ISDestroy(&remoteRootIS));
1421   }
1422   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject) dm), sf));
1423   CHKERRQ(DMPlexGetChart(dm, &pStart, &pEnd));
1424   CHKERRQ(PetscSFSetGraph(*sf, pEnd-pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER));
1425   CHKERRQ(PetscSFSetUp(*sf));
1426   CHKERRQ(ISDestroy(&neighborsIS));
1427   CHKERRQ(PetscLogEventEnd(DMPLEX_PartLabelCreateSF,dm,0,0,0));
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #if defined(PETSC_HAVE_PARMETIS)
1432 #include <parmetis.h>
1433 #endif
1434 
1435 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors
1436  * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take
1437  * them out in that case. */
1438 #if defined(PETSC_HAVE_PARMETIS)
1439 /*@C
1440 
1441   DMPlexRewriteSF - Rewrites the ownership of the SF of a DM (in place).
1442 
1443   Input parameters:
1444 + dm                - The DMPlex object.
1445 . n                 - The number of points.
1446 . pointsToRewrite   - The points in the SF whose ownership will change.
1447 . targetOwners      - New owner for each element in pointsToRewrite.
1448 - degrees           - Degrees of the points in the SF as obtained by PetscSFComputeDegreeBegin/PetscSFComputeDegreeEnd.
1449 
1450   Level: developer
1451 
1452 @*/
1453 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees)
1454 {
1455   PetscInt      pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs;
1456   PetscInt     *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew;
1457   PetscSFNode  *leafLocationsNew;
1458   const         PetscSFNode *iremote;
1459   const         PetscInt *ilocal;
1460   PetscBool    *isLeaf;
1461   PetscSF       sf;
1462   MPI_Comm      comm;
1463   PetscMPIInt   rank, size;
1464 
1465   PetscFunctionBegin;
1466   CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm));
1467   CHKERRMPI(MPI_Comm_rank(comm, &rank));
1468   CHKERRMPI(MPI_Comm_size(comm, &size));
1469   CHKERRQ(DMPlexGetChart(dm, &pStart, &pEnd));
1470 
1471   CHKERRQ(DMGetPointSF(dm, &sf));
1472   CHKERRQ(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote));
1473   CHKERRQ(PetscMalloc1(pEnd-pStart, &isLeaf));
1474   for (i=0; i<pEnd-pStart; i++) {
1475     isLeaf[i] = PETSC_FALSE;
1476   }
1477   for (i=0; i<nleafs; i++) {
1478     isLeaf[ilocal[i]-pStart] = PETSC_TRUE;
1479   }
1480 
1481   CHKERRQ(PetscMalloc1(pEnd-pStart+1, &cumSumDegrees));
1482   cumSumDegrees[0] = 0;
1483   for (i=1; i<=pEnd-pStart; i++) {
1484     cumSumDegrees[i] = cumSumDegrees[i-1] + degrees[i-1];
1485   }
1486   sumDegrees = cumSumDegrees[pEnd-pStart];
1487   /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */
1488 
1489   CHKERRQ(PetscMalloc1(sumDegrees, &locationsOfLeafs));
1490   CHKERRQ(PetscMalloc1(pEnd-pStart, &rankOnLeafs));
1491   for (i=0; i<pEnd-pStart; i++) {
1492     rankOnLeafs[i] = rank;
1493   }
1494   CHKERRQ(PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs));
1495   CHKERRQ(PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs));
1496   CHKERRQ(PetscFree(rankOnLeafs));
1497 
1498   /* get the remote local points of my leaves */
1499   CHKERRQ(PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs));
1500   CHKERRQ(PetscMalloc1(pEnd-pStart, &points));
1501   for (i=0; i<pEnd-pStart; i++) {
1502     points[i] = pStart+i;
1503   }
1504   CHKERRQ(PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs));
1505   CHKERRQ(PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs));
1506   CHKERRQ(PetscFree(points));
1507   /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */
1508   CHKERRQ(PetscMalloc1(pEnd-pStart, &newOwners));
1509   CHKERRQ(PetscMalloc1(pEnd-pStart, &newNumbers));
1510   for (i=0; i<pEnd-pStart; i++) {
1511     newOwners[i] = -1;
1512     newNumbers[i] = -1;
1513   }
1514   {
1515     PetscInt oldNumber, newNumber, oldOwner, newOwner;
1516     for (i=0; i<n; i++) {
1517       oldNumber = pointsToRewrite[i];
1518       newNumber = -1;
1519       oldOwner = rank;
1520       newOwner = targetOwners[i];
1521       if (oldOwner == newOwner) {
1522         newNumber = oldNumber;
1523       } else {
1524         for (j=0; j<degrees[oldNumber]; j++) {
1525           if (locationsOfLeafs[cumSumDegrees[oldNumber]+j] == newOwner) {
1526             newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber]+j];
1527             break;
1528           }
1529         }
1530       }
1531       PetscCheck(newNumber != -1,PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex.");
1532 
1533       newOwners[oldNumber] = newOwner;
1534       newNumbers[oldNumber] = newNumber;
1535     }
1536   }
1537   CHKERRQ(PetscFree(cumSumDegrees));
1538   CHKERRQ(PetscFree(locationsOfLeafs));
1539   CHKERRQ(PetscFree(remoteLocalPointOfLeafs));
1540 
1541   CHKERRQ(PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners,MPI_REPLACE));
1542   CHKERRQ(PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners,MPI_REPLACE));
1543   CHKERRQ(PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers,MPI_REPLACE));
1544   CHKERRQ(PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers,MPI_REPLACE));
1545 
1546   /* Now count how many leafs we have on each processor. */
1547   leafCounter=0;
1548   for (i=0; i<pEnd-pStart; i++) {
1549     if (newOwners[i] >= 0) {
1550       if (newOwners[i] != rank) {
1551         leafCounter++;
1552       }
1553     } else {
1554       if (isLeaf[i]) {
1555         leafCounter++;
1556       }
1557     }
1558   }
1559 
1560   /* Now set up the new sf by creating the leaf arrays */
1561   CHKERRQ(PetscMalloc1(leafCounter, &leafsNew));
1562   CHKERRQ(PetscMalloc1(leafCounter, &leafLocationsNew));
1563 
1564   leafCounter = 0;
1565   counter = 0;
1566   for (i=0; i<pEnd-pStart; i++) {
1567     if (newOwners[i] >= 0) {
1568       if (newOwners[i] != rank) {
1569         leafsNew[leafCounter] = i;
1570         leafLocationsNew[leafCounter].rank = newOwners[i];
1571         leafLocationsNew[leafCounter].index = newNumbers[i];
1572         leafCounter++;
1573       }
1574     } else {
1575       if (isLeaf[i]) {
1576         leafsNew[leafCounter] = i;
1577         leafLocationsNew[leafCounter].rank = iremote[counter].rank;
1578         leafLocationsNew[leafCounter].index = iremote[counter].index;
1579         leafCounter++;
1580       }
1581     }
1582     if (isLeaf[i]) {
1583       counter++;
1584     }
1585   }
1586 
1587   CHKERRQ(PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER));
1588   CHKERRQ(PetscFree(newOwners));
1589   CHKERRQ(PetscFree(newNumbers));
1590   CHKERRQ(PetscFree(isLeaf));
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer)
1595 {
1596   PetscInt    *distribution, min, max, sum;
1597   PetscMPIInt rank, size;
1598 
1599   PetscFunctionBegin;
1600   CHKERRMPI(MPI_Comm_size(comm, &size));
1601   CHKERRMPI(MPI_Comm_rank(comm, &rank));
1602   CHKERRQ(PetscCalloc1(size, &distribution));
1603   for (PetscInt i=0; i<n; i++) {
1604     if (part) distribution[part[i]] += vtxwgt[skip*i];
1605     else distribution[rank] += vtxwgt[skip*i];
1606   }
1607   CHKERRMPI(MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm));
1608   min = distribution[0];
1609   max = distribution[0];
1610   sum = distribution[0];
1611   for (PetscInt i=1; i<size; i++) {
1612     if (distribution[i]<min) min=distribution[i];
1613     if (distribution[i]>max) max=distribution[i];
1614     sum += distribution[i];
1615   }
1616   CHKERRQ(PetscViewerASCIIPrintf(viewer, "Min: %D, Avg: %D, Max: %D, Balance: %f\n", min, sum/size, max, (max*1.*size)/sum));
1617   CHKERRQ(PetscFree(distribution));
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #endif
1622 
1623 /*@
1624   DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the PointSF of the DM inplace.
1625 
1626   Input parameters:
1627 + dm               - The DMPlex object.
1628 . entityDepth      - depth of the entity to balance (0 -> balance vertices).
1629 . useInitialGuess  - whether to use the current distribution as initial guess (only used by ParMETIS).
1630 - parallel         - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS.
1631 
1632   Output parameters:
1633 . success          - whether the graph partitioning was successful or not. If not, try useInitialGuess=True and parallel=True.
1634 
1635   Level: intermediate
1636 
1637 @*/
1638 
1639 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success)
1640 {
1641 #if defined(PETSC_HAVE_PARMETIS)
1642   PetscSF     sf;
1643   PetscInt    ierr, i, j, idx, jdx;
1644   PetscInt    eBegin, eEnd, nroots, nleafs, pStart, pEnd;
1645   const       PetscInt *degrees, *ilocal;
1646   const       PetscSFNode *iremote;
1647   PetscBool   *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned;
1648   PetscInt    numExclusivelyOwned, numNonExclusivelyOwned;
1649   PetscMPIInt rank, size;
1650   PetscInt    *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers;
1651   const       PetscInt *cumSumVertices;
1652   PetscInt    offset, counter;
1653   PetscInt    lenadjncy;
1654   PetscInt    *xadj, *adjncy, *vtxwgt;
1655   PetscInt    lenxadj;
1656   PetscInt    *adjwgt = NULL;
1657   PetscInt    *part, *options;
1658   PetscInt    nparts, wgtflag, numflag, ncon, edgecut;
1659   real_t      *ubvec;
1660   PetscInt    *firstVertices, *renumbering;
1661   PetscInt    failed, failedGlobal;
1662   MPI_Comm    comm;
1663   Mat         A, Apre;
1664   const char *prefix = NULL;
1665   PetscViewer       viewer;
1666   PetscViewerFormat format;
1667   PetscLayout layout;
1668 
1669   PetscFunctionBegin;
1670   if (success) *success = PETSC_FALSE;
1671   CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm));
1672   CHKERRMPI(MPI_Comm_rank(comm, &rank));
1673   CHKERRMPI(MPI_Comm_size(comm, &size));
1674   if (size==1) PetscFunctionReturn(0);
1675 
1676   CHKERRQ(PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1677 
1678   CHKERRQ(PetscOptionsGetViewer(comm,((PetscObject)dm)->options, prefix,"-dm_rebalance_partition_view",&viewer,&format,NULL));
1679   if (viewer) {
1680     CHKERRQ(PetscViewerPushFormat(viewer,format));
1681   }
1682 
1683   /* Figure out all points in the plex that we are interested in balancing. */
1684   CHKERRQ(DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd));
1685   CHKERRQ(DMPlexGetChart(dm, &pStart, &pEnd));
1686   CHKERRQ(PetscMalloc1(pEnd-pStart, &toBalance));
1687 
1688   for (i=0; i<pEnd-pStart; i++) {
1689     toBalance[i] = (PetscBool)(i-pStart>=eBegin && i-pStart<eEnd);
1690   }
1691 
1692   /* There are three types of points:
1693    * exclusivelyOwned: points that are owned by this process and only seen by this process
1694    * nonExclusivelyOwned: points that are owned by this process but seen by at least another process
1695    * leaf: a point that is seen by this process but owned by a different process
1696    */
1697   CHKERRQ(DMGetPointSF(dm, &sf));
1698   CHKERRQ(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote));
1699   CHKERRQ(PetscMalloc1(pEnd-pStart, &isLeaf));
1700   CHKERRQ(PetscMalloc1(pEnd-pStart, &isNonExclusivelyOwned));
1701   CHKERRQ(PetscMalloc1(pEnd-pStart, &isExclusivelyOwned));
1702   for (i=0; i<pEnd-pStart; i++) {
1703     isNonExclusivelyOwned[i] = PETSC_FALSE;
1704     isExclusivelyOwned[i] = PETSC_FALSE;
1705     isLeaf[i] = PETSC_FALSE;
1706   }
1707 
1708   /* start by marking all the leafs */
1709   for (i=0; i<nleafs; i++) {
1710     isLeaf[ilocal[i]-pStart] = PETSC_TRUE;
1711   }
1712 
1713   /* for an owned point, we can figure out whether another processor sees it or
1714    * not by calculating its degree */
1715   CHKERRQ(PetscSFComputeDegreeBegin(sf, &degrees));
1716   CHKERRQ(PetscSFComputeDegreeEnd(sf, &degrees));
1717 
1718   numExclusivelyOwned = 0;
1719   numNonExclusivelyOwned = 0;
1720   for (i=0; i<pEnd-pStart; i++) {
1721     if (toBalance[i]) {
1722       if (degrees[i] > 0) {
1723         isNonExclusivelyOwned[i] = PETSC_TRUE;
1724         numNonExclusivelyOwned += 1;
1725       } else {
1726         if (!isLeaf[i]) {
1727           isExclusivelyOwned[i] = PETSC_TRUE;
1728           numExclusivelyOwned += 1;
1729         }
1730       }
1731     }
1732   }
1733 
1734   /* We are going to build a graph with one vertex per core representing the
1735    * exclusively owned points and then one vertex per nonExclusively owned
1736    * point. */
1737 
1738   CHKERRQ(PetscLayoutCreate(comm, &layout));
1739   CHKERRQ(PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned));
1740   CHKERRQ(PetscLayoutSetUp(layout));
1741   CHKERRQ(PetscLayoutGetRanges(layout, &cumSumVertices));
1742 
1743   CHKERRQ(PetscMalloc1(pEnd-pStart, &globalNumbersOfLocalOwnedVertices));
1744   for (i=0; i<pEnd-pStart; i++) {globalNumbersOfLocalOwnedVertices[i] = pStart - 1;}
1745   offset = cumSumVertices[rank];
1746   counter = 0;
1747   for (i=0; i<pEnd-pStart; i++) {
1748     if (toBalance[i]) {
1749       if (degrees[i] > 0) {
1750         globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset;
1751         counter++;
1752       }
1753     }
1754   }
1755 
1756   /* send the global numbers of vertices I own to the leafs so that they know to connect to it */
1757   CHKERRQ(PetscMalloc1(pEnd-pStart, &leafGlobalNumbers));
1758   CHKERRQ(PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers,MPI_REPLACE));
1759   CHKERRQ(PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers,MPI_REPLACE));
1760 
1761   /* Now start building the data structure for ParMETIS */
1762 
1763   CHKERRQ(MatCreate(comm, &Apre));
1764   CHKERRQ(MatSetType(Apre, MATPREALLOCATOR));
1765   CHKERRQ(MatSetSizes(Apre, 1+numNonExclusivelyOwned, 1+numNonExclusivelyOwned, cumSumVertices[size], cumSumVertices[size]));
1766   CHKERRQ(MatSetUp(Apre));
1767 
1768   for (i=0; i<pEnd-pStart; i++) {
1769     if (toBalance[i]) {
1770       idx = cumSumVertices[rank];
1771       if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i];
1772       else if (isLeaf[i]) jdx = leafGlobalNumbers[i];
1773       else continue;
1774       CHKERRQ(MatSetValue(Apre, idx, jdx, 1, INSERT_VALUES));
1775       CHKERRQ(MatSetValue(Apre, jdx, idx, 1, INSERT_VALUES));
1776     }
1777   }
1778 
1779   CHKERRQ(MatAssemblyBegin(Apre, MAT_FINAL_ASSEMBLY));
1780   CHKERRQ(MatAssemblyEnd(Apre, MAT_FINAL_ASSEMBLY));
1781 
1782   CHKERRQ(MatCreate(comm, &A));
1783   CHKERRQ(MatSetType(A, MATMPIAIJ));
1784   CHKERRQ(MatSetSizes(A, 1+numNonExclusivelyOwned, 1+numNonExclusivelyOwned, cumSumVertices[size], cumSumVertices[size]));
1785   CHKERRQ(MatPreallocatorPreallocate(Apre, PETSC_FALSE, A));
1786   CHKERRQ(MatDestroy(&Apre));
1787 
1788   for (i=0; i<pEnd-pStart; i++) {
1789     if (toBalance[i]) {
1790       idx = cumSumVertices[rank];
1791       if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i];
1792       else if (isLeaf[i]) jdx = leafGlobalNumbers[i];
1793       else continue;
1794       CHKERRQ(MatSetValue(A, idx, jdx, 1, INSERT_VALUES));
1795       CHKERRQ(MatSetValue(A, jdx, idx, 1, INSERT_VALUES));
1796     }
1797   }
1798 
1799   CHKERRQ(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1800   CHKERRQ(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1801   CHKERRQ(PetscFree(leafGlobalNumbers));
1802   CHKERRQ(PetscFree(globalNumbersOfLocalOwnedVertices));
1803 
1804   nparts = size;
1805   wgtflag = 2;
1806   numflag = 0;
1807   ncon = 2;
1808   real_t *tpwgts;
1809   CHKERRQ(PetscMalloc1(ncon * nparts, &tpwgts));
1810   for (i=0; i<ncon*nparts; i++) {
1811     tpwgts[i] = 1./(nparts);
1812   }
1813 
1814   CHKERRQ(PetscMalloc1(ncon, &ubvec));
1815   ubvec[0] = 1.01;
1816   ubvec[1] = 1.01;
1817   lenadjncy = 0;
1818   for (i=0; i<1+numNonExclusivelyOwned; i++) {
1819     PetscInt temp=0;
1820     CHKERRQ(MatGetRow(A, cumSumVertices[rank] + i, &temp, NULL, NULL));
1821     lenadjncy += temp;
1822     CHKERRQ(MatRestoreRow(A, cumSumVertices[rank] + i, &temp, NULL, NULL));
1823   }
1824   CHKERRQ(PetscMalloc1(lenadjncy, &adjncy));
1825   lenxadj = 2 + numNonExclusivelyOwned;
1826   CHKERRQ(PetscMalloc1(lenxadj, &xadj));
1827   xadj[0] = 0;
1828   counter = 0;
1829   for (i=0; i<1+numNonExclusivelyOwned; i++) {
1830     PetscInt        temp=0;
1831     const PetscInt *cols;
1832     CHKERRQ(MatGetRow(A, cumSumVertices[rank] + i, &temp, &cols, NULL));
1833     CHKERRQ(PetscArraycpy(&adjncy[counter], cols, temp));
1834     counter += temp;
1835     xadj[i+1] = counter;
1836     CHKERRQ(MatRestoreRow(A, cumSumVertices[rank] + i, &temp, &cols, NULL));
1837   }
1838 
1839   CHKERRQ(PetscMalloc1(cumSumVertices[rank+1]-cumSumVertices[rank], &part));
1840   CHKERRQ(PetscMalloc1(ncon*(1 + numNonExclusivelyOwned), &vtxwgt));
1841   vtxwgt[0] = numExclusivelyOwned;
1842   if (ncon>1) vtxwgt[1] = 1;
1843   for (i=0; i<numNonExclusivelyOwned; i++) {
1844     vtxwgt[ncon*(i+1)] = 1;
1845     if (ncon>1) vtxwgt[ncon*(i+1)+1] = 0;
1846   }
1847 
1848   if (viewer) {
1849     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %D on interface of mesh distribution.\n", entityDepth));
1850     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %D\n", cumSumVertices[size]));
1851   }
1852   if (parallel) {
1853     CHKERRQ(PetscMalloc1(4, &options));
1854     options[0] = 1;
1855     options[1] = 0; /* Verbosity */
1856     options[2] = 0; /* Seed */
1857     options[3] = PARMETIS_PSR_COUPLED; /* Seed */
1858     if (viewer) CHKERRQ(PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n"));
1859     if (useInitialGuess) {
1860       if (viewer) CHKERRQ(PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n"));
1861       PetscStackPush("ParMETIS_V3_RefineKway");
1862       ierr = ParMETIS_V3_RefineKway((PetscInt*)cumSumVertices, xadj, adjncy, vtxwgt, adjwgt, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1863       PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()");
1864       PetscStackPop;
1865     } else {
1866       PetscStackPush("ParMETIS_V3_PartKway");
1867       ierr = ParMETIS_V3_PartKway((PetscInt*)cumSumVertices, xadj, adjncy, vtxwgt, adjwgt, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1868       PetscStackPop;
1869       PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()");
1870     }
1871     CHKERRQ(PetscFree(options));
1872   } else {
1873     if (viewer) CHKERRQ(PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n"));
1874     Mat As;
1875     PetscInt numRows;
1876     PetscInt *partGlobal;
1877     CHKERRQ(MatCreateRedundantMatrix(A, size, MPI_COMM_NULL, MAT_INITIAL_MATRIX, &As));
1878 
1879     PetscInt *numExclusivelyOwnedAll;
1880     CHKERRQ(PetscMalloc1(size, &numExclusivelyOwnedAll));
1881     numExclusivelyOwnedAll[rank] = numExclusivelyOwned;
1882     CHKERRMPI(MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,numExclusivelyOwnedAll,1,MPIU_INT,comm));
1883 
1884     CHKERRQ(MatGetSize(As, &numRows, NULL));
1885     CHKERRQ(PetscMalloc1(numRows, &partGlobal));
1886     if (rank == 0) {
1887       PetscInt *adjncy_g, *xadj_g, *vtxwgt_g;
1888       lenadjncy = 0;
1889 
1890       for (i=0; i<numRows; i++) {
1891         PetscInt temp=0;
1892         CHKERRQ(MatGetRow(As, i, &temp, NULL, NULL));
1893         lenadjncy += temp;
1894         CHKERRQ(MatRestoreRow(As, i, &temp, NULL, NULL));
1895       }
1896       CHKERRQ(PetscMalloc1(lenadjncy, &adjncy_g));
1897       lenxadj = 1 + numRows;
1898       CHKERRQ(PetscMalloc1(lenxadj, &xadj_g));
1899       xadj_g[0] = 0;
1900       counter = 0;
1901       for (i=0; i<numRows; i++) {
1902         PetscInt        temp=0;
1903         const PetscInt *cols;
1904         CHKERRQ(MatGetRow(As, i, &temp, &cols, NULL));
1905         CHKERRQ(PetscArraycpy(&adjncy_g[counter], cols, temp));
1906         counter += temp;
1907         xadj_g[i+1] = counter;
1908         CHKERRQ(MatRestoreRow(As, i, &temp, &cols, NULL));
1909       }
1910       CHKERRQ(PetscMalloc1(2*numRows, &vtxwgt_g));
1911       for (i=0; i<size; i++) {
1912         vtxwgt_g[ncon*cumSumVertices[i]] = numExclusivelyOwnedAll[i];
1913         if (ncon>1) vtxwgt_g[ncon*cumSumVertices[i]+1] = 1;
1914         for (j=cumSumVertices[i]+1; j<cumSumVertices[i+1]; j++) {
1915           vtxwgt_g[ncon*j] = 1;
1916           if (ncon>1) vtxwgt_g[2*j+1] = 0;
1917         }
1918       }
1919       CHKERRQ(PetscMalloc1(64, &options));
1920       ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */
1921       PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()");
1922       options[METIS_OPTION_CONTIG] = 1;
1923       PetscStackPush("METIS_PartGraphKway");
1924       ierr = METIS_PartGraphKway(&numRows, &ncon, xadj_g, adjncy_g, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal);
1925       PetscStackPop;
1926       PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()");
1927       CHKERRQ(PetscFree(options));
1928       CHKERRQ(PetscFree(xadj_g));
1929       CHKERRQ(PetscFree(adjncy_g));
1930       CHKERRQ(PetscFree(vtxwgt_g));
1931     }
1932     CHKERRQ(PetscFree(numExclusivelyOwnedAll));
1933 
1934     /* Now scatter the parts array. */
1935     {
1936       PetscMPIInt *counts, *mpiCumSumVertices;
1937       CHKERRQ(PetscMalloc1(size, &counts));
1938       CHKERRQ(PetscMalloc1(size+1, &mpiCumSumVertices));
1939       for (i=0; i<size; i++) {
1940         CHKERRQ(PetscMPIIntCast(cumSumVertices[i+1] - cumSumVertices[i], &(counts[i])));
1941       }
1942       for (i=0; i<=size; i++) {
1943         CHKERRQ(PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i])));
1944       }
1945       CHKERRMPI(MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm));
1946       CHKERRQ(PetscFree(counts));
1947       CHKERRQ(PetscFree(mpiCumSumVertices));
1948     }
1949 
1950     CHKERRQ(PetscFree(partGlobal));
1951     CHKERRQ(MatDestroy(&As));
1952   }
1953 
1954   CHKERRQ(MatDestroy(&A));
1955   CHKERRQ(PetscFree(ubvec));
1956   CHKERRQ(PetscFree(tpwgts));
1957 
1958   /* Now rename the result so that the vertex resembling the exclusively owned points stays on the same rank */
1959 
1960   CHKERRQ(PetscMalloc1(size, &firstVertices));
1961   CHKERRQ(PetscMalloc1(size, &renumbering));
1962   firstVertices[rank] = part[0];
1963   CHKERRMPI(MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,firstVertices,1,MPIU_INT,comm));
1964   for (i=0; i<size; i++) {
1965     renumbering[firstVertices[i]] = i;
1966   }
1967   for (i=0; i<cumSumVertices[rank+1]-cumSumVertices[rank]; i++) {
1968     part[i] = renumbering[part[i]];
1969   }
1970   /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */
1971   failed = (PetscInt)(part[0] != rank);
1972   CHKERRMPI(MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm));
1973 
1974   CHKERRQ(PetscFree(firstVertices));
1975   CHKERRQ(PetscFree(renumbering));
1976 
1977   if (failedGlobal > 0) {
1978     CHKERRQ(PetscLayoutDestroy(&layout));
1979     CHKERRQ(PetscFree(xadj));
1980     CHKERRQ(PetscFree(adjncy));
1981     CHKERRQ(PetscFree(vtxwgt));
1982     CHKERRQ(PetscFree(toBalance));
1983     CHKERRQ(PetscFree(isLeaf));
1984     CHKERRQ(PetscFree(isNonExclusivelyOwned));
1985     CHKERRQ(PetscFree(isExclusivelyOwned));
1986     CHKERRQ(PetscFree(part));
1987     if (viewer) {
1988       CHKERRQ(PetscViewerPopFormat(viewer));
1989       CHKERRQ(PetscViewerDestroy(&viewer));
1990     }
1991     CHKERRQ(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
1992     PetscFunctionReturn(0);
1993   }
1994 
1995   /*Let's check how well we did distributing points*/
1996   if (viewer) {
1997     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Comparing number of owned entities of depth %D on each process before rebalancing, after rebalancing, and after consistency checks.\n", entityDepth));
1998     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Initial.     "));
1999     CHKERRQ(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, NULL, viewer));
2000     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Rebalanced.  "));
2001     CHKERRQ(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer));
2002   }
2003 
2004   /* Now check that every vertex is owned by a process that it is actually connected to. */
2005   for (i=1; i<=numNonExclusivelyOwned; i++) {
2006     PetscInt loc = 0;
2007     CHKERRQ(PetscFindInt(cumSumVertices[part[i]], xadj[i+1]-xadj[i], &adjncy[xadj[i]], &loc));
2008     /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */
2009     if (loc<0) {
2010       part[i] = rank;
2011     }
2012   }
2013 
2014   /* Let's see how significant the influences of the previous fixing up step was.*/
2015   if (viewer) {
2016     CHKERRQ(PetscViewerASCIIPrintf(viewer, "After.       "));
2017     CHKERRQ(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer));
2018   }
2019 
2020   CHKERRQ(PetscLayoutDestroy(&layout));
2021   CHKERRQ(PetscFree(xadj));
2022   CHKERRQ(PetscFree(adjncy));
2023   CHKERRQ(PetscFree(vtxwgt));
2024 
2025   /* Almost done, now rewrite the SF to reflect the new ownership. */
2026   {
2027     PetscInt *pointsToRewrite;
2028     CHKERRQ(PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite));
2029     counter = 0;
2030     for (i=0; i<pEnd-pStart; i++) {
2031       if (toBalance[i]) {
2032         if (isNonExclusivelyOwned[i]) {
2033           pointsToRewrite[counter] = i + pStart;
2034           counter++;
2035         }
2036       }
2037     }
2038     CHKERRQ(DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part+1, degrees));
2039     CHKERRQ(PetscFree(pointsToRewrite));
2040   }
2041 
2042   CHKERRQ(PetscFree(toBalance));
2043   CHKERRQ(PetscFree(isLeaf));
2044   CHKERRQ(PetscFree(isNonExclusivelyOwned));
2045   CHKERRQ(PetscFree(isExclusivelyOwned));
2046   CHKERRQ(PetscFree(part));
2047   if (viewer) {
2048     CHKERRQ(PetscViewerPopFormat(viewer));
2049     CHKERRQ(PetscViewerDestroy(&viewer));
2050   }
2051   if (success) *success = PETSC_TRUE;
2052   CHKERRQ(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0));
2053   PetscFunctionReturn(0);
2054 #else
2055   SETERRQ(PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis.");
2056 #endif
2057 }
2058