1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petsc/private/partitionerimpl.h> 3 #include <petsc/private/hashseti.h> 4 5 const char *const DMPlexCSRAlgorithms[] = {"mat", "graph", "overlap", "DMPlexCSRAlgorithm", "DM_PLEX_CSR_", NULL}; 6 7 static inline PetscInt DMPlex_GlobalID(PetscInt point) 8 { 9 return point >= 0 ? point : -(point + 1); 10 } 11 12 static PetscErrorCode DMPlexCreatePartitionerGraph_Overlap(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 13 { 14 DM ovdm; 15 PetscSF sfPoint; 16 IS cellNumbering; 17 const PetscInt *cellNum; 18 PetscInt *adj = NULL, *vOffsets = NULL, *vAdj = NULL; 19 PetscBool useCone, useClosure; 20 PetscInt dim, depth, overlap, cStart, cEnd, c, v; 21 PetscMPIInt rank, size; 22 23 PetscFunctionBegin; 24 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 25 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size)); 26 PetscCall(DMGetDimension(dm, &dim)); 27 PetscCall(DMPlexGetDepth(dm, &depth)); 28 if (dim != depth) { 29 /* We do not handle the uninterpolated case here */ 30 PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 31 /* DMPlexCreateNeighborCSR does not make a numbering */ 32 if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 33 /* Different behavior for empty graphs */ 34 if (!*numVertices) { 35 PetscCall(PetscMalloc1(1, offsets)); 36 (*offsets)[0] = 0; 37 } 38 /* Broken in parallel */ 39 if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 40 PetscFunctionReturn(0); 41 } 42 /* Always use FVM adjacency to create partitioner graph */ 43 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 44 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 45 /* Need overlap >= 1 */ 46 PetscCall(DMPlexGetOverlap(dm, &overlap)); 47 if (size && overlap < 1) { 48 PetscCall(DMPlexDistributeOverlap(dm, 1, NULL, &ovdm)); 49 } else { 50 PetscCall(PetscObjectReference((PetscObject)dm)); 51 ovdm = dm; 52 } 53 PetscCall(DMGetPointSF(ovdm, &sfPoint)); 54 PetscCall(DMPlexGetHeightStratum(ovdm, height, &cStart, &cEnd)); 55 PetscCall(DMPlexCreateNumbering_Plex(ovdm, cStart, cEnd, 0, NULL, sfPoint, &cellNumbering)); 56 if (globalNumbering) { 57 PetscCall(PetscObjectReference((PetscObject)cellNumbering)); 58 *globalNumbering = cellNumbering; 59 } 60 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 61 /* Determine sizes */ 62 for (*numVertices = 0, c = cStart; c < cEnd; ++c) { 63 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 64 if (cellNum[c - cStart] < 0) continue; 65 (*numVertices)++; 66 } 67 PetscCall(PetscCalloc1(*numVertices + 1, &vOffsets)); 68 for (c = cStart, v = 0; c < cEnd; ++c) { 69 PetscInt adjSize = PETSC_DETERMINE, a, vsize = 0; 70 71 if (cellNum[c - cStart] < 0) continue; 72 PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj)); 73 for (a = 0; a < adjSize; ++a) { 74 const PetscInt point = adj[a]; 75 if (point != c && cStart <= point && point < cEnd) ++vsize; 76 } 77 vOffsets[v + 1] = vOffsets[v] + vsize; 78 ++v; 79 } 80 /* Determine adjacency */ 81 PetscCall(PetscMalloc1(vOffsets[*numVertices], &vAdj)); 82 for (c = cStart, v = 0; c < cEnd; ++c) { 83 PetscInt adjSize = PETSC_DETERMINE, a, off = vOffsets[v]; 84 85 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 86 if (cellNum[c - cStart] < 0) continue; 87 PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj)); 88 for (a = 0; a < adjSize; ++a) { 89 const PetscInt point = adj[a]; 90 if (point != c && cStart <= point && point < cEnd) vAdj[off++] = DMPlex_GlobalID(cellNum[point - cStart]); 91 } 92 PetscCheck(off == vOffsets[v + 1], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Offsets %" PetscInt_FMT " should be %" PetscInt_FMT, off, vOffsets[v + 1]); 93 /* Sort adjacencies (not strictly necessary) */ 94 PetscCall(PetscSortInt(off - vOffsets[v], &vAdj[vOffsets[v]])); 95 ++v; 96 } 97 PetscCall(PetscFree(adj)); 98 PetscCall(ISRestoreIndices(cellNumbering, &cellNum)); 99 PetscCall(ISDestroy(&cellNumbering)); 100 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 101 PetscCall(DMDestroy(&ovdm)); 102 if (offsets) { 103 *offsets = vOffsets; 104 } else PetscCall(PetscFree(vOffsets)); 105 if (adjacency) { 106 *adjacency = vAdj; 107 } else PetscCall(PetscFree(vAdj)); 108 PetscFunctionReturn(0); 109 } 110 111 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 112 { 113 PetscInt dim, depth, p, pStart, pEnd, a, adjSize, idx, size; 114 PetscInt *adj = NULL, *vOffsets = NULL, *graph = NULL; 115 IS cellNumbering; 116 const PetscInt *cellNum; 117 PetscBool useCone, useClosure; 118 PetscSection section; 119 PetscSegBuffer adjBuffer; 120 PetscSF sfPoint; 121 PetscInt *adjCells = NULL, *remoteCells = NULL; 122 const PetscInt *local; 123 PetscInt nroots, nleaves, l; 124 PetscMPIInt rank; 125 126 PetscFunctionBegin; 127 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 128 PetscCall(DMGetDimension(dm, &dim)); 129 PetscCall(DMPlexGetDepth(dm, &depth)); 130 if (dim != depth) { 131 /* We do not handle the uninterpolated case here */ 132 PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 133 /* DMPlexCreateNeighborCSR does not make a numbering */ 134 if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 135 /* Different behavior for empty graphs */ 136 if (!*numVertices) { 137 PetscCall(PetscMalloc1(1, offsets)); 138 (*offsets)[0] = 0; 139 } 140 /* Broken in parallel */ 141 if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 142 PetscFunctionReturn(0); 143 } 144 PetscCall(DMGetPointSF(dm, &sfPoint)); 145 PetscCall(DMPlexGetHeightStratum(dm, height, &pStart, &pEnd)); 146 /* Build adjacency graph via a section/segbuffer */ 147 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 148 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 149 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 150 /* Always use FVM adjacency to create partitioner graph */ 151 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 152 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 153 PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering)); 154 if (globalNumbering) { 155 PetscCall(PetscObjectReference((PetscObject)cellNumbering)); 156 *globalNumbering = cellNumbering; 157 } 158 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 159 /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells 160 Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves) 161 */ 162 PetscCall(PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL)); 163 if (nroots >= 0) { 164 PetscInt fStart, fEnd, f; 165 166 PetscCall(PetscCalloc2(nroots, &adjCells, nroots, &remoteCells)); 167 PetscCall(DMPlexGetHeightStratum(dm, height + 1, &fStart, &fEnd)); 168 for (l = 0; l < nroots; ++l) adjCells[l] = -3; 169 for (f = fStart; f < fEnd; ++f) { 170 const PetscInt *support; 171 PetscInt supportSize; 172 173 PetscCall(DMPlexGetSupport(dm, f, &support)); 174 PetscCall(DMPlexGetSupportSize(dm, f, &supportSize)); 175 if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0] - pStart]); 176 else if (supportSize == 2) { 177 PetscCall(PetscFindInt(support[0], nleaves, local, &p)); 178 if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1] - pStart]); 179 PetscCall(PetscFindInt(support[1], nleaves, local, &p)); 180 if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0] - pStart]); 181 } 182 /* Handle non-conforming meshes */ 183 if (supportSize > 2) { 184 PetscInt numChildren, i; 185 const PetscInt *children; 186 187 PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, &children)); 188 for (i = 0; i < numChildren; ++i) { 189 const PetscInt child = children[i]; 190 if (fStart <= child && child < fEnd) { 191 PetscCall(DMPlexGetSupport(dm, child, &support)); 192 PetscCall(DMPlexGetSupportSize(dm, child, &supportSize)); 193 if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0] - pStart]); 194 else if (supportSize == 2) { 195 PetscCall(PetscFindInt(support[0], nleaves, local, &p)); 196 if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1] - pStart]); 197 PetscCall(PetscFindInt(support[1], nleaves, local, &p)); 198 if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0] - pStart]); 199 } 200 } 201 } 202 } 203 } 204 for (l = 0; l < nroots; ++l) remoteCells[l] = -1; 205 PetscCall(PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_REPLACE)); 206 PetscCall(PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_REPLACE)); 207 PetscCall(PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX)); 208 PetscCall(PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX)); 209 } 210 /* Combine local and global adjacencies */ 211 for (*numVertices = 0, p = pStart; p < pEnd; p++) { 212 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 213 if (nroots > 0) { 214 if (cellNum[p - pStart] < 0) continue; 215 } 216 /* Add remote cells */ 217 if (remoteCells) { 218 const PetscInt gp = DMPlex_GlobalID(cellNum[p - pStart]); 219 PetscInt coneSize, numChildren, c, i; 220 const PetscInt *cone, *children; 221 222 PetscCall(DMPlexGetCone(dm, p, &cone)); 223 PetscCall(DMPlexGetConeSize(dm, p, &coneSize)); 224 for (c = 0; c < coneSize; ++c) { 225 const PetscInt point = cone[c]; 226 if (remoteCells[point] >= 0 && remoteCells[point] != gp) { 227 PetscInt *PETSC_RESTRICT pBuf; 228 PetscCall(PetscSectionAddDof(section, p, 1)); 229 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 230 *pBuf = remoteCells[point]; 231 } 232 /* Handle non-conforming meshes */ 233 PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 234 for (i = 0; i < numChildren; ++i) { 235 const PetscInt child = children[i]; 236 if (remoteCells[child] >= 0 && remoteCells[child] != gp) { 237 PetscInt *PETSC_RESTRICT pBuf; 238 PetscCall(PetscSectionAddDof(section, p, 1)); 239 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 240 *pBuf = remoteCells[child]; 241 } 242 } 243 } 244 } 245 /* Add local cells */ 246 adjSize = PETSC_DETERMINE; 247 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 248 for (a = 0; a < adjSize; ++a) { 249 const PetscInt point = adj[a]; 250 if (point != p && pStart <= point && point < pEnd) { 251 PetscInt *PETSC_RESTRICT pBuf; 252 PetscCall(PetscSectionAddDof(section, p, 1)); 253 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 254 *pBuf = DMPlex_GlobalID(cellNum[point - pStart]); 255 } 256 } 257 (*numVertices)++; 258 } 259 PetscCall(PetscFree(adj)); 260 PetscCall(PetscFree2(adjCells, remoteCells)); 261 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 262 263 /* Derive CSR graph from section/segbuffer */ 264 PetscCall(PetscSectionSetUp(section)); 265 PetscCall(PetscSectionGetStorageSize(section, &size)); 266 PetscCall(PetscMalloc1(*numVertices + 1, &vOffsets)); 267 for (idx = 0, p = pStart; p < pEnd; p++) { 268 if (nroots > 0) { 269 if (cellNum[p - pStart] < 0) continue; 270 } 271 PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++]))); 272 } 273 vOffsets[*numVertices] = size; 274 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph)); 275 276 if (nroots >= 0) { 277 /* Filter out duplicate edges using section/segbuffer */ 278 PetscCall(PetscSectionReset(section)); 279 PetscCall(PetscSectionSetChart(section, 0, *numVertices)); 280 for (p = 0; p < *numVertices; p++) { 281 PetscInt start = vOffsets[p], end = vOffsets[p + 1]; 282 PetscInt numEdges = end - start, *PETSC_RESTRICT edges; 283 PetscCall(PetscSortRemoveDupsInt(&numEdges, &graph[start])); 284 PetscCall(PetscSectionSetDof(section, p, numEdges)); 285 PetscCall(PetscSegBufferGetInts(adjBuffer, numEdges, &edges)); 286 PetscCall(PetscArraycpy(edges, &graph[start], numEdges)); 287 } 288 PetscCall(PetscFree(vOffsets)); 289 PetscCall(PetscFree(graph)); 290 /* Derive CSR graph from section/segbuffer */ 291 PetscCall(PetscSectionSetUp(section)); 292 PetscCall(PetscSectionGetStorageSize(section, &size)); 293 PetscCall(PetscMalloc1(*numVertices + 1, &vOffsets)); 294 for (idx = 0, p = 0; p < *numVertices; p++) PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++]))); 295 vOffsets[*numVertices] = size; 296 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph)); 297 } else { 298 /* Sort adjacencies (not strictly necessary) */ 299 for (p = 0; p < *numVertices; p++) { 300 PetscInt start = vOffsets[p], end = vOffsets[p + 1]; 301 PetscCall(PetscSortInt(end - start, &graph[start])); 302 } 303 } 304 305 if (offsets) *offsets = vOffsets; 306 if (adjacency) *adjacency = graph; 307 308 /* Cleanup */ 309 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 310 PetscCall(PetscSectionDestroy(§ion)); 311 PetscCall(ISRestoreIndices(cellNumbering, &cellNum)); 312 PetscCall(ISDestroy(&cellNumbering)); 313 PetscFunctionReturn(0); 314 } 315 316 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 317 { 318 Mat conn, CSR; 319 IS fis, cis, cis_own; 320 PetscSF sfPoint; 321 const PetscInt *rows, *cols, *ii, *jj; 322 PetscInt *idxs, *idxs2; 323 PetscInt dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd; 324 PetscMPIInt rank; 325 PetscBool flg; 326 327 PetscFunctionBegin; 328 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 329 PetscCall(DMGetDimension(dm, &dim)); 330 PetscCall(DMPlexGetDepth(dm, &depth)); 331 if (dim != depth) { 332 /* We do not handle the uninterpolated case here */ 333 PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 334 /* DMPlexCreateNeighborCSR does not make a numbering */ 335 if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 336 /* Different behavior for empty graphs */ 337 if (!*numVertices) { 338 PetscCall(PetscMalloc1(1, offsets)); 339 (*offsets)[0] = 0; 340 } 341 /* Broken in parallel */ 342 if (rank) PetscCheck(!*numVertices, PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 343 PetscFunctionReturn(0); 344 } 345 /* Interpolated and parallel case */ 346 347 /* numbering */ 348 PetscCall(DMGetPointSF(dm, &sfPoint)); 349 PetscCall(DMPlexGetHeightStratum(dm, height, &cStart, &cEnd)); 350 PetscCall(DMPlexGetHeightStratum(dm, height + 1, &fStart, &fEnd)); 351 PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis)); 352 PetscCall(DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis)); 353 if (globalNumbering) PetscCall(ISDuplicate(cis, globalNumbering)); 354 355 /* get positive global ids and local sizes for facets and cells */ 356 PetscCall(ISGetLocalSize(fis, &m)); 357 PetscCall(ISGetIndices(fis, &rows)); 358 PetscCall(PetscMalloc1(m, &idxs)); 359 for (i = 0, floc = 0; i < m; i++) { 360 const PetscInt p = rows[i]; 361 362 if (p < 0) { 363 idxs[i] = -(p + 1); 364 } else { 365 idxs[i] = p; 366 floc += 1; 367 } 368 } 369 PetscCall(ISRestoreIndices(fis, &rows)); 370 PetscCall(ISDestroy(&fis)); 371 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis)); 372 373 PetscCall(ISGetLocalSize(cis, &m)); 374 PetscCall(ISGetIndices(cis, &cols)); 375 PetscCall(PetscMalloc1(m, &idxs)); 376 PetscCall(PetscMalloc1(m, &idxs2)); 377 for (i = 0, cloc = 0; i < m; i++) { 378 const PetscInt p = cols[i]; 379 380 if (p < 0) { 381 idxs[i] = -(p + 1); 382 } else { 383 idxs[i] = p; 384 idxs2[cloc++] = p; 385 } 386 } 387 PetscCall(ISRestoreIndices(cis, &cols)); 388 PetscCall(ISDestroy(&cis)); 389 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis)); 390 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own)); 391 392 /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */ 393 PetscCall(MatCreate(PetscObjectComm((PetscObject)dm), &conn)); 394 PetscCall(MatSetSizes(conn, floc, cloc, M, N)); 395 PetscCall(MatSetType(conn, MATMPIAIJ)); 396 PetscCall(DMPlexGetMaxSizes(dm, NULL, &lm)); 397 PetscCallMPI(MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)dm))); 398 PetscCall(MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL)); 399 400 /* Assemble matrix */ 401 PetscCall(ISGetIndices(fis, &rows)); 402 PetscCall(ISGetIndices(cis, &cols)); 403 for (c = cStart; c < cEnd; c++) { 404 const PetscInt *cone; 405 PetscInt coneSize, row, col, f; 406 407 col = cols[c - cStart]; 408 PetscCall(DMPlexGetCone(dm, c, &cone)); 409 PetscCall(DMPlexGetConeSize(dm, c, &coneSize)); 410 for (f = 0; f < coneSize; f++) { 411 const PetscScalar v = 1.0; 412 const PetscInt *children; 413 PetscInt numChildren, ch; 414 415 row = rows[cone[f] - fStart]; 416 PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES)); 417 418 /* non-conforming meshes */ 419 PetscCall(DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children)); 420 for (ch = 0; ch < numChildren; ch++) { 421 const PetscInt child = children[ch]; 422 423 if (child < fStart || child >= fEnd) continue; 424 row = rows[child - fStart]; 425 PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES)); 426 } 427 } 428 } 429 PetscCall(ISRestoreIndices(fis, &rows)); 430 PetscCall(ISRestoreIndices(cis, &cols)); 431 PetscCall(ISDestroy(&fis)); 432 PetscCall(ISDestroy(&cis)); 433 PetscCall(MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY)); 434 PetscCall(MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY)); 435 436 /* Get parallel CSR by doing conn^T * conn */ 437 PetscCall(MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR)); 438 PetscCall(MatDestroy(&conn)); 439 440 /* extract local part of the CSR */ 441 PetscCall(MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn)); 442 PetscCall(MatDestroy(&CSR)); 443 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg)); 444 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format"); 445 446 /* get back requested output */ 447 if (numVertices) *numVertices = m; 448 if (offsets) { 449 PetscCall(PetscCalloc1(m + 1, &idxs)); 450 for (i = 1; i < m + 1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */ 451 *offsets = idxs; 452 } 453 if (adjacency) { 454 PetscCall(PetscMalloc1(ii[m] - m, &idxs)); 455 PetscCall(ISGetIndices(cis_own, &rows)); 456 for (i = 0, c = 0; i < m; i++) { 457 PetscInt j, g = rows[i]; 458 459 for (j = ii[i]; j < ii[i + 1]; j++) { 460 if (jj[j] == g) continue; /* again, self-connectivity */ 461 idxs[c++] = jj[j]; 462 } 463 } 464 PetscCheck(c == ii[m] - m, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, c, ii[m] - m); 465 PetscCall(ISRestoreIndices(cis_own, &rows)); 466 *adjacency = idxs; 467 } 468 469 /* cleanup */ 470 PetscCall(ISDestroy(&cis_own)); 471 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg)); 472 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format"); 473 PetscCall(MatDestroy(&conn)); 474 PetscFunctionReturn(0); 475 } 476 477 /*@C 478 DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner 479 480 Input Parameters: 481 + dm - The mesh DM dm 482 - height - Height of the strata from which to construct the graph 483 484 Output Parameters: 485 + numVertices - Number of vertices in the graph 486 . offsets - Point offsets in the graph 487 . adjacency - Point connectivity in the graph 488 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency". Negative indicates that the cell is a duplicate from another process. 489 490 The user can control the definition of adjacency for the mesh using DMSetAdjacency(). They should choose the combination appropriate for the function 491 representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree(). 492 493 Options Database Keys: 494 . -dm_plex_csr_alg <mat,graph,overlap> - Choose the algorithm for computing the CSR graph 495 496 Level: developer 497 498 .seealso: `PetscPartitionerGetType()`, `PetscPartitionerCreate()`, `DMSetAdjacency()` 499 @*/ 500 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 501 { 502 DMPlexCSRAlgorithm alg = DM_PLEX_CSR_GRAPH; 503 504 PetscFunctionBegin; 505 PetscCall(PetscOptionsGetEnum(((PetscObject)dm)->options, ((PetscObject)dm)->prefix, "-dm_plex_csr_alg", DMPlexCSRAlgorithms, (PetscEnum *)&alg, NULL)); 506 switch (alg) { 507 case DM_PLEX_CSR_MAT: 508 PetscCall(DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering)); 509 break; 510 case DM_PLEX_CSR_GRAPH: 511 PetscCall(DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering)); 512 break; 513 case DM_PLEX_CSR_OVERLAP: 514 PetscCall(DMPlexCreatePartitionerGraph_Overlap(dm, height, numVertices, offsets, adjacency, globalNumbering)); 515 break; 516 } 517 PetscFunctionReturn(0); 518 } 519 520 /*@C 521 DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format. 522 523 Collective on DM 524 525 Input Parameters: 526 + dm - The DMPlex 527 - cellHeight - The height of mesh points to treat as cells (default should be 0) 528 529 Output Parameters: 530 + numVertices - The number of local vertices in the graph, or cells in the mesh. 531 . offsets - The offset to the adjacency list for each cell 532 - adjacency - The adjacency list for all cells 533 534 Note: This is suitable for input to a mesh partitioner like ParMetis. 535 536 Level: advanced 537 538 .seealso: `DMPlexCreate()` 539 @*/ 540 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency) 541 { 542 const PetscInt maxFaceCases = 30; 543 PetscInt numFaceCases = 0; 544 PetscInt numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */ 545 PetscInt *off, *adj; 546 PetscInt *neighborCells = NULL; 547 PetscInt dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell; 548 549 PetscFunctionBegin; 550 /* For parallel partitioning, I think you have to communicate supports */ 551 PetscCall(DMGetDimension(dm, &dim)); 552 cellDim = dim - cellHeight; 553 PetscCall(DMPlexGetDepth(dm, &depth)); 554 PetscCall(DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd)); 555 if (cEnd - cStart == 0) { 556 if (numVertices) *numVertices = 0; 557 if (offsets) *offsets = NULL; 558 if (adjacency) *adjacency = NULL; 559 PetscFunctionReturn(0); 560 } 561 numCells = cEnd - cStart; 562 faceDepth = depth - cellHeight; 563 if (dim == depth) { 564 PetscInt f, fStart, fEnd; 565 566 PetscCall(PetscCalloc1(numCells + 1, &off)); 567 /* Count neighboring cells */ 568 PetscCall(DMPlexGetHeightStratum(dm, cellHeight + 1, &fStart, &fEnd)); 569 for (f = fStart; f < fEnd; ++f) { 570 const PetscInt *support; 571 PetscInt supportSize; 572 PetscCall(DMPlexGetSupportSize(dm, f, &supportSize)); 573 PetscCall(DMPlexGetSupport(dm, f, &support)); 574 if (supportSize == 2) { 575 PetscInt numChildren; 576 577 PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, NULL)); 578 if (!numChildren) { 579 ++off[support[0] - cStart + 1]; 580 ++off[support[1] - cStart + 1]; 581 } 582 } 583 } 584 /* Prefix sum */ 585 for (c = 1; c <= numCells; ++c) off[c] += off[c - 1]; 586 if (adjacency) { 587 PetscInt *tmp; 588 589 PetscCall(PetscMalloc1(off[numCells], &adj)); 590 PetscCall(PetscMalloc1(numCells + 1, &tmp)); 591 PetscCall(PetscArraycpy(tmp, off, numCells + 1)); 592 /* Get neighboring cells */ 593 for (f = fStart; f < fEnd; ++f) { 594 const PetscInt *support; 595 PetscInt supportSize; 596 PetscCall(DMPlexGetSupportSize(dm, f, &supportSize)); 597 PetscCall(DMPlexGetSupport(dm, f, &support)); 598 if (supportSize == 2) { 599 PetscInt numChildren; 600 601 PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, NULL)); 602 if (!numChildren) { 603 adj[tmp[support[0] - cStart]++] = support[1]; 604 adj[tmp[support[1] - cStart]++] = support[0]; 605 } 606 } 607 } 608 for (c = 0; c < cEnd - cStart; ++c) PetscAssert(tmp[c] == off[c + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %" PetscInt_FMT " != %" PetscInt_FMT " for cell %" PetscInt_FMT, tmp[c], off[c], c + cStart); 609 PetscCall(PetscFree(tmp)); 610 } 611 if (numVertices) *numVertices = numCells; 612 if (offsets) *offsets = off; 613 if (adjacency) *adjacency = adj; 614 PetscFunctionReturn(0); 615 } 616 /* Setup face recognition */ 617 if (faceDepth == 1) { 618 PetscInt cornersSeen[30] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* Could use PetscBT */ 619 620 for (c = cStart; c < cEnd; ++c) { 621 PetscInt corners; 622 623 PetscCall(DMPlexGetConeSize(dm, c, &corners)); 624 if (!cornersSeen[corners]) { 625 PetscInt nFV; 626 627 PetscCheck(numFaceCases < maxFaceCases, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases"); 628 cornersSeen[corners] = 1; 629 630 PetscCall(DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV)); 631 632 numFaceVertices[numFaceCases++] = nFV; 633 } 634 } 635 } 636 PetscCall(PetscCalloc1(numCells + 1, &off)); 637 /* Count neighboring cells */ 638 for (cell = cStart; cell < cEnd; ++cell) { 639 PetscInt numNeighbors = PETSC_DETERMINE, n; 640 641 PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells)); 642 /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */ 643 for (n = 0; n < numNeighbors; ++n) { 644 PetscInt cellPair[2]; 645 PetscBool found = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE; 646 PetscInt meetSize = 0; 647 const PetscInt *meet = NULL; 648 649 cellPair[0] = cell; 650 cellPair[1] = neighborCells[n]; 651 if (cellPair[0] == cellPair[1]) continue; 652 if (!found) { 653 PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet)); 654 if (meetSize) { 655 PetscInt f; 656 657 for (f = 0; f < numFaceCases; ++f) { 658 if (numFaceVertices[f] == meetSize) { 659 found = PETSC_TRUE; 660 break; 661 } 662 } 663 } 664 PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet)); 665 } 666 if (found) ++off[cell - cStart + 1]; 667 } 668 } 669 /* Prefix sum */ 670 for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell - 1]; 671 672 if (adjacency) { 673 PetscCall(PetscMalloc1(off[numCells], &adj)); 674 /* Get neighboring cells */ 675 for (cell = cStart; cell < cEnd; ++cell) { 676 PetscInt numNeighbors = PETSC_DETERMINE, n; 677 PetscInt cellOffset = 0; 678 679 PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells)); 680 /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */ 681 for (n = 0; n < numNeighbors; ++n) { 682 PetscInt cellPair[2]; 683 PetscBool found = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE; 684 PetscInt meetSize = 0; 685 const PetscInt *meet = NULL; 686 687 cellPair[0] = cell; 688 cellPair[1] = neighborCells[n]; 689 if (cellPair[0] == cellPair[1]) continue; 690 if (!found) { 691 PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet)); 692 if (meetSize) { 693 PetscInt f; 694 695 for (f = 0; f < numFaceCases; ++f) { 696 if (numFaceVertices[f] == meetSize) { 697 found = PETSC_TRUE; 698 break; 699 } 700 } 701 } 702 PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet)); 703 } 704 if (found) { 705 adj[off[cell - cStart] + cellOffset] = neighborCells[n]; 706 ++cellOffset; 707 } 708 } 709 } 710 } 711 PetscCall(PetscFree(neighborCells)); 712 if (numVertices) *numVertices = numCells; 713 if (offsets) *offsets = off; 714 if (adjacency) *adjacency = adj; 715 PetscFunctionReturn(0); 716 } 717 718 /*@ 719 PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh 720 721 Collective on PetscPartitioner 722 723 Input Parameters: 724 + part - The PetscPartitioner 725 . targetSection - The PetscSection describing the absolute weight of each partition (can be NULL) 726 - dm - The mesh DM 727 728 Output Parameters: 729 + partSection - The PetscSection giving the division of points by partition 730 - partition - The list of points by partition 731 732 Notes: 733 If the DM has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified 734 by the section in the transitive closure of the point. 735 736 Level: developer 737 738 .seealso `DMPlexDistribute()`, `PetscPartitionerCreate()`, `PetscSectionCreate()`, `PetscSectionSetChart()`, `PetscPartitionerPartition()` 739 @*/ 740 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition) 741 { 742 PetscMPIInt size; 743 PetscBool isplex; 744 PetscSection vertSection = NULL; 745 746 PetscFunctionBegin; 747 PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1); 748 PetscValidHeaderSpecific(dm, DM_CLASSID, 2); 749 if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3); 750 PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4); 751 PetscValidPointer(partition, 5); 752 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex)); 753 PetscCheck(isplex, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)dm)->type_name); 754 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)part), &size)); 755 if (size == 1) { 756 PetscInt *points; 757 PetscInt cStart, cEnd, c; 758 759 PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd)); 760 PetscCall(PetscSectionReset(partSection)); 761 PetscCall(PetscSectionSetChart(partSection, 0, size)); 762 PetscCall(PetscSectionSetDof(partSection, 0, cEnd - cStart)); 763 PetscCall(PetscSectionSetUp(partSection)); 764 PetscCall(PetscMalloc1(cEnd - cStart, &points)); 765 for (c = cStart; c < cEnd; ++c) points[c] = c; 766 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)part), cEnd - cStart, points, PETSC_OWN_POINTER, partition)); 767 PetscFunctionReturn(0); 768 } 769 if (part->height == 0) { 770 PetscInt numVertices = 0; 771 PetscInt *start = NULL; 772 PetscInt *adjacency = NULL; 773 IS globalNumbering; 774 775 if (!part->noGraph || part->viewGraph) { 776 PetscCall(DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering)); 777 } else { /* only compute the number of owned local vertices */ 778 const PetscInt *idxs; 779 PetscInt p, pStart, pEnd; 780 781 PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd)); 782 PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering)); 783 PetscCall(ISGetIndices(globalNumbering, &idxs)); 784 for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1; 785 PetscCall(ISRestoreIndices(globalNumbering, &idxs)); 786 } 787 if (part->usevwgt) { 788 PetscSection section = dm->localSection, clSection = NULL; 789 IS clPoints = NULL; 790 const PetscInt *gid, *clIdx; 791 PetscInt v, p, pStart, pEnd; 792 793 /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */ 794 /* We do this only if the local section has been set */ 795 if (section) { 796 PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL)); 797 if (!clSection) PetscCall(DMPlexCreateClosureIndex(dm, NULL)); 798 PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints)); 799 PetscCall(ISGetIndices(clPoints, &clIdx)); 800 } 801 PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd)); 802 PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &vertSection)); 803 PetscCall(PetscSectionSetChart(vertSection, 0, numVertices)); 804 if (globalNumbering) { 805 PetscCall(ISGetIndices(globalNumbering, &gid)); 806 } else gid = NULL; 807 for (p = pStart, v = 0; p < pEnd; ++p) { 808 PetscInt dof = 1; 809 810 /* skip cells in the overlap */ 811 if (gid && gid[p - pStart] < 0) continue; 812 813 if (section) { 814 PetscInt cl, clSize, clOff; 815 816 dof = 0; 817 PetscCall(PetscSectionGetDof(clSection, p, &clSize)); 818 PetscCall(PetscSectionGetOffset(clSection, p, &clOff)); 819 for (cl = 0; cl < clSize; cl += 2) { 820 PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */ 821 822 PetscCall(PetscSectionGetDof(section, clPoint, &clDof)); 823 dof += clDof; 824 } 825 } 826 PetscCheck(dof, PETSC_COMM_SELF, PETSC_ERR_SUP, "Number of dofs for point %" PetscInt_FMT " in the local section should be positive", p); 827 PetscCall(PetscSectionSetDof(vertSection, v, dof)); 828 v++; 829 } 830 if (globalNumbering) PetscCall(ISRestoreIndices(globalNumbering, &gid)); 831 if (clPoints) PetscCall(ISRestoreIndices(clPoints, &clIdx)); 832 PetscCall(PetscSectionSetUp(vertSection)); 833 } 834 PetscCall(PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition)); 835 PetscCall(PetscFree(start)); 836 PetscCall(PetscFree(adjacency)); 837 if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */ 838 const PetscInt *globalNum; 839 const PetscInt *partIdx; 840 PetscInt *map, cStart, cEnd; 841 PetscInt *adjusted, i, localSize, offset; 842 IS newPartition; 843 844 PetscCall(ISGetLocalSize(*partition, &localSize)); 845 PetscCall(PetscMalloc1(localSize, &adjusted)); 846 PetscCall(ISGetIndices(globalNumbering, &globalNum)); 847 PetscCall(ISGetIndices(*partition, &partIdx)); 848 PetscCall(PetscMalloc1(localSize, &map)); 849 PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd)); 850 for (i = cStart, offset = 0; i < cEnd; i++) { 851 if (globalNum[i - cStart] >= 0) map[offset++] = i; 852 } 853 for (i = 0; i < localSize; i++) adjusted[i] = map[partIdx[i]]; 854 PetscCall(PetscFree(map)); 855 PetscCall(ISRestoreIndices(*partition, &partIdx)); 856 PetscCall(ISRestoreIndices(globalNumbering, &globalNum)); 857 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, localSize, adjusted, PETSC_OWN_POINTER, &newPartition)); 858 PetscCall(ISDestroy(&globalNumbering)); 859 PetscCall(ISDestroy(partition)); 860 *partition = newPartition; 861 } 862 } else SETERRQ(PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %" PetscInt_FMT " for points to partition", part->height); 863 PetscCall(PetscSectionDestroy(&vertSection)); 864 PetscFunctionReturn(0); 865 } 866 867 /*@ 868 DMPlexGetPartitioner - Get the mesh partitioner 869 870 Not collective 871 872 Input Parameter: 873 . dm - The DM 874 875 Output Parameter: 876 . part - The PetscPartitioner 877 878 Level: developer 879 880 Note: This gets a borrowed reference, so the user should not destroy this PetscPartitioner. 881 882 .seealso `DMPlexDistribute()`, `DMPlexSetPartitioner()`, `PetscPartitionerDMPlexPartition()`, `PetscPartitionerCreate()` 883 @*/ 884 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part) 885 { 886 DM_Plex *mesh = (DM_Plex *)dm->data; 887 888 PetscFunctionBegin; 889 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 890 PetscValidPointer(part, 2); 891 *part = mesh->partitioner; 892 PetscFunctionReturn(0); 893 } 894 895 /*@ 896 DMPlexSetPartitioner - Set the mesh partitioner 897 898 logically collective on DM 899 900 Input Parameters: 901 + dm - The DM 902 - part - The partitioner 903 904 Level: developer 905 906 Note: Any existing PetscPartitioner will be destroyed. 907 908 .seealso `DMPlexDistribute()`, `DMPlexGetPartitioner()`, `PetscPartitionerCreate()` 909 @*/ 910 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part) 911 { 912 DM_Plex *mesh = (DM_Plex *)dm->data; 913 914 PetscFunctionBegin; 915 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 916 PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2); 917 PetscCall(PetscObjectReference((PetscObject)part)); 918 PetscCall(PetscPartitionerDestroy(&mesh->partitioner)); 919 mesh->partitioner = part; 920 PetscFunctionReturn(0); 921 } 922 923 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point) 924 { 925 const PetscInt *cone; 926 PetscInt coneSize, c; 927 PetscBool missing; 928 929 PetscFunctionBeginHot; 930 PetscCall(PetscHSetIQueryAdd(ht, point, &missing)); 931 if (missing) { 932 PetscCall(DMPlexGetCone(dm, point, &cone)); 933 PetscCall(DMPlexGetConeSize(dm, point, &coneSize)); 934 for (c = 0; c < coneSize; c++) PetscCall(DMPlexAddClosure_Private(dm, ht, cone[c])); 935 } 936 PetscFunctionReturn(0); 937 } 938 939 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down) 940 { 941 PetscFunctionBegin; 942 if (up) { 943 PetscInt parent; 944 945 PetscCall(DMPlexGetTreeParent(dm, point, &parent, NULL)); 946 if (parent != point) { 947 PetscInt closureSize, *closure = NULL, i; 948 949 PetscCall(DMPlexGetTransitiveClosure(dm, parent, PETSC_TRUE, &closureSize, &closure)); 950 for (i = 0; i < closureSize; i++) { 951 PetscInt cpoint = closure[2 * i]; 952 953 PetscCall(PetscHSetIAdd(ht, cpoint)); 954 PetscCall(DMPlexAddClosure_Tree(dm, ht, cpoint, PETSC_TRUE, PETSC_FALSE)); 955 } 956 PetscCall(DMPlexRestoreTransitiveClosure(dm, parent, PETSC_TRUE, &closureSize, &closure)); 957 } 958 } 959 if (down) { 960 PetscInt numChildren; 961 const PetscInt *children; 962 963 PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 964 if (numChildren) { 965 PetscInt i; 966 967 for (i = 0; i < numChildren; i++) { 968 PetscInt cpoint = children[i]; 969 970 PetscCall(PetscHSetIAdd(ht, cpoint)); 971 PetscCall(DMPlexAddClosure_Tree(dm, ht, cpoint, PETSC_FALSE, PETSC_TRUE)); 972 } 973 } 974 } 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point) 979 { 980 PetscInt parent; 981 982 PetscFunctionBeginHot; 983 PetscCall(DMPlexGetTreeParent(dm, point, &parent, NULL)); 984 if (point != parent) { 985 const PetscInt *cone; 986 PetscInt coneSize, c; 987 988 PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, parent)); 989 PetscCall(DMPlexAddClosure_Private(dm, ht, parent)); 990 PetscCall(DMPlexGetCone(dm, parent, &cone)); 991 PetscCall(DMPlexGetConeSize(dm, parent, &coneSize)); 992 for (c = 0; c < coneSize; c++) { 993 const PetscInt cp = cone[c]; 994 995 PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, cp)); 996 } 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point) 1002 { 1003 PetscInt i, numChildren; 1004 const PetscInt *children; 1005 1006 PetscFunctionBeginHot; 1007 PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 1008 for (i = 0; i < numChildren; i++) PetscCall(PetscHSetIAdd(ht, children[i])); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point) 1013 { 1014 const PetscInt *cone; 1015 PetscInt coneSize, c; 1016 1017 PetscFunctionBeginHot; 1018 PetscCall(PetscHSetIAdd(ht, point)); 1019 PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, point)); 1020 PetscCall(DMPlexAddClosureTree_Down_Private(dm, ht, point)); 1021 PetscCall(DMPlexGetCone(dm, point, &cone)); 1022 PetscCall(DMPlexGetConeSize(dm, point, &coneSize)); 1023 for (c = 0; c < coneSize; c++) PetscCall(DMPlexAddClosureTree_Private(dm, ht, cone[c])); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS) 1028 { 1029 DM_Plex *mesh = (DM_Plex *)dm->data; 1030 const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE; 1031 PetscInt nelems, *elems, off = 0, p; 1032 PetscHSetI ht = NULL; 1033 1034 PetscFunctionBegin; 1035 PetscCall(PetscHSetICreate(&ht)); 1036 PetscCall(PetscHSetIResize(ht, numPoints * 16)); 1037 if (!hasTree) { 1038 for (p = 0; p < numPoints; ++p) PetscCall(DMPlexAddClosure_Private(dm, ht, points[p])); 1039 } else { 1040 #if 1 1041 for (p = 0; p < numPoints; ++p) PetscCall(DMPlexAddClosureTree_Private(dm, ht, points[p])); 1042 #else 1043 PetscInt *closure = NULL, closureSize, c; 1044 for (p = 0; p < numPoints; ++p) { 1045 PetscCall(DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure)); 1046 for (c = 0; c < closureSize * 2; c += 2) { 1047 PetscCall(PetscHSetIAdd(ht, closure[c])); 1048 if (hasTree) PetscCall(DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE)); 1049 } 1050 } 1051 if (closure) PetscCall(DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure)); 1052 #endif 1053 } 1054 PetscCall(PetscHSetIGetSize(ht, &nelems)); 1055 PetscCall(PetscMalloc1(nelems, &elems)); 1056 PetscCall(PetscHSetIGetElems(ht, &off, elems)); 1057 PetscCall(PetscHSetIDestroy(&ht)); 1058 PetscCall(PetscSortInt(nelems, elems)); 1059 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS)); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 /*@ 1064 DMPlexPartitionLabelClosure - Add the closure of all points to the partition label 1065 1066 Input Parameters: 1067 + dm - The DM 1068 - label - DMLabel assigning ranks to remote roots 1069 1070 Level: developer 1071 1072 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1073 @*/ 1074 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label) 1075 { 1076 IS rankIS, pointIS, closureIS; 1077 const PetscInt *ranks, *points; 1078 PetscInt numRanks, numPoints, r; 1079 1080 PetscFunctionBegin; 1081 PetscCall(DMLabelGetValueIS(label, &rankIS)); 1082 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1083 PetscCall(ISGetIndices(rankIS, &ranks)); 1084 for (r = 0; r < numRanks; ++r) { 1085 const PetscInt rank = ranks[r]; 1086 PetscCall(DMLabelGetStratumIS(label, rank, &pointIS)); 1087 PetscCall(ISGetLocalSize(pointIS, &numPoints)); 1088 PetscCall(ISGetIndices(pointIS, &points)); 1089 PetscCall(DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS)); 1090 PetscCall(ISRestoreIndices(pointIS, &points)); 1091 PetscCall(ISDestroy(&pointIS)); 1092 PetscCall(DMLabelSetStratumIS(label, rank, closureIS)); 1093 PetscCall(ISDestroy(&closureIS)); 1094 } 1095 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1096 PetscCall(ISDestroy(&rankIS)); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 /*@ 1101 DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label 1102 1103 Input Parameters: 1104 + dm - The DM 1105 - label - DMLabel assigning ranks to remote roots 1106 1107 Level: developer 1108 1109 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1110 @*/ 1111 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label) 1112 { 1113 IS rankIS, pointIS; 1114 const PetscInt *ranks, *points; 1115 PetscInt numRanks, numPoints, r, p, a, adjSize; 1116 PetscInt *adj = NULL; 1117 1118 PetscFunctionBegin; 1119 PetscCall(DMLabelGetValueIS(label, &rankIS)); 1120 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1121 PetscCall(ISGetIndices(rankIS, &ranks)); 1122 for (r = 0; r < numRanks; ++r) { 1123 const PetscInt rank = ranks[r]; 1124 1125 PetscCall(DMLabelGetStratumIS(label, rank, &pointIS)); 1126 PetscCall(ISGetLocalSize(pointIS, &numPoints)); 1127 PetscCall(ISGetIndices(pointIS, &points)); 1128 for (p = 0; p < numPoints; ++p) { 1129 adjSize = PETSC_DETERMINE; 1130 PetscCall(DMPlexGetAdjacency(dm, points[p], &adjSize, &adj)); 1131 for (a = 0; a < adjSize; ++a) PetscCall(DMLabelSetValue(label, adj[a], rank)); 1132 } 1133 PetscCall(ISRestoreIndices(pointIS, &points)); 1134 PetscCall(ISDestroy(&pointIS)); 1135 } 1136 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1137 PetscCall(ISDestroy(&rankIS)); 1138 PetscCall(PetscFree(adj)); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 /*@ 1143 DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point SF 1144 1145 Input Parameters: 1146 + dm - The DM 1147 - label - DMLabel assigning ranks to remote roots 1148 1149 Level: developer 1150 1151 Note: This is required when generating multi-level overlaps to capture 1152 overlap points from non-neighbouring partitions. 1153 1154 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1155 @*/ 1156 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label) 1157 { 1158 MPI_Comm comm; 1159 PetscMPIInt rank; 1160 PetscSF sfPoint; 1161 DMLabel lblRoots, lblLeaves; 1162 IS rankIS, pointIS; 1163 const PetscInt *ranks; 1164 PetscInt numRanks, r; 1165 1166 PetscFunctionBegin; 1167 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 1168 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1169 PetscCall(DMGetPointSF(dm, &sfPoint)); 1170 /* Pull point contributions from remote leaves into local roots */ 1171 PetscCall(DMLabelGather(label, sfPoint, &lblLeaves)); 1172 PetscCall(DMLabelGetValueIS(lblLeaves, &rankIS)); 1173 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1174 PetscCall(ISGetIndices(rankIS, &ranks)); 1175 for (r = 0; r < numRanks; ++r) { 1176 const PetscInt remoteRank = ranks[r]; 1177 if (remoteRank == rank) continue; 1178 PetscCall(DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS)); 1179 PetscCall(DMLabelInsertIS(label, pointIS, remoteRank)); 1180 PetscCall(ISDestroy(&pointIS)); 1181 } 1182 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1183 PetscCall(ISDestroy(&rankIS)); 1184 PetscCall(DMLabelDestroy(&lblLeaves)); 1185 /* Push point contributions from roots into remote leaves */ 1186 PetscCall(DMLabelDistribute(label, sfPoint, &lblRoots)); 1187 PetscCall(DMLabelGetValueIS(lblRoots, &rankIS)); 1188 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1189 PetscCall(ISGetIndices(rankIS, &ranks)); 1190 for (r = 0; r < numRanks; ++r) { 1191 const PetscInt remoteRank = ranks[r]; 1192 if (remoteRank == rank) continue; 1193 PetscCall(DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS)); 1194 PetscCall(DMLabelInsertIS(label, pointIS, remoteRank)); 1195 PetscCall(ISDestroy(&pointIS)); 1196 } 1197 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1198 PetscCall(ISDestroy(&rankIS)); 1199 PetscCall(DMLabelDestroy(&lblRoots)); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 /*@ 1204 DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label 1205 1206 Input Parameters: 1207 + dm - The DM 1208 . rootLabel - DMLabel assigning ranks to local roots 1209 - processSF - A star forest mapping into the local index on each remote rank 1210 1211 Output Parameter: 1212 . leafLabel - DMLabel assigning ranks to remote roots 1213 1214 Note: The rootLabel defines a send pattern by mapping local points to remote target ranks. The 1215 resulting leafLabel is a receiver mapping of remote roots to their parent rank. 1216 1217 Level: developer 1218 1219 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1220 @*/ 1221 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel) 1222 { 1223 MPI_Comm comm; 1224 PetscMPIInt rank, size, r; 1225 PetscInt p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize; 1226 PetscSF sfPoint; 1227 PetscSection rootSection; 1228 PetscSFNode *rootPoints, *leafPoints; 1229 const PetscSFNode *remote; 1230 const PetscInt *local, *neighbors; 1231 IS valueIS; 1232 PetscBool mpiOverflow = PETSC_FALSE; 1233 1234 PetscFunctionBegin; 1235 PetscCall(PetscLogEventBegin(DMPLEX_PartLabelInvert, dm, 0, 0, 0)); 1236 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 1237 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1238 PetscCallMPI(MPI_Comm_size(comm, &size)); 1239 PetscCall(DMGetPointSF(dm, &sfPoint)); 1240 1241 /* Convert to (point, rank) and use actual owners */ 1242 PetscCall(PetscSectionCreate(comm, &rootSection)); 1243 PetscCall(PetscSectionSetChart(rootSection, 0, size)); 1244 PetscCall(DMLabelGetValueIS(rootLabel, &valueIS)); 1245 PetscCall(ISGetLocalSize(valueIS, &numNeighbors)); 1246 PetscCall(ISGetIndices(valueIS, &neighbors)); 1247 for (n = 0; n < numNeighbors; ++n) { 1248 PetscCall(DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints)); 1249 PetscCall(PetscSectionAddDof(rootSection, neighbors[n], numPoints)); 1250 } 1251 PetscCall(PetscSectionSetUp(rootSection)); 1252 PetscCall(PetscSectionGetStorageSize(rootSection, &rootSize)); 1253 PetscCall(PetscMalloc1(rootSize, &rootPoints)); 1254 PetscCall(PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote)); 1255 for (n = 0; n < numNeighbors; ++n) { 1256 IS pointIS; 1257 const PetscInt *points; 1258 1259 PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off)); 1260 PetscCall(DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS)); 1261 PetscCall(ISGetLocalSize(pointIS, &numPoints)); 1262 PetscCall(ISGetIndices(pointIS, &points)); 1263 for (p = 0; p < numPoints; ++p) { 1264 if (local) PetscCall(PetscFindInt(points[p], nleaves, local, &l)); 1265 else l = -1; 1266 if (l >= 0) { 1267 rootPoints[off + p] = remote[l]; 1268 } else { 1269 rootPoints[off + p].index = points[p]; 1270 rootPoints[off + p].rank = rank; 1271 } 1272 } 1273 PetscCall(ISRestoreIndices(pointIS, &points)); 1274 PetscCall(ISDestroy(&pointIS)); 1275 } 1276 1277 /* Try to communicate overlap using All-to-All */ 1278 if (!processSF) { 1279 PetscInt64 counter = 0; 1280 PetscBool locOverflow = PETSC_FALSE; 1281 PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls; 1282 1283 PetscCall(PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls)); 1284 for (n = 0; n < numNeighbors; ++n) { 1285 PetscCall(PetscSectionGetDof(rootSection, neighbors[n], &dof)); 1286 PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off)); 1287 #if defined(PETSC_USE_64BIT_INDICES) 1288 if (dof > PETSC_MPI_INT_MAX) { 1289 locOverflow = PETSC_TRUE; 1290 break; 1291 } 1292 if (off > PETSC_MPI_INT_MAX) { 1293 locOverflow = PETSC_TRUE; 1294 break; 1295 } 1296 #endif 1297 scounts[neighbors[n]] = (PetscMPIInt)dof; 1298 sdispls[neighbors[n]] = (PetscMPIInt)off; 1299 } 1300 PetscCallMPI(MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm)); 1301 for (r = 0; r < size; ++r) { 1302 rdispls[r] = (int)counter; 1303 counter += rcounts[r]; 1304 } 1305 if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE; 1306 PetscCallMPI(MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm)); 1307 if (!mpiOverflow) { 1308 PetscCall(PetscInfo(dm, "Using Alltoallv for mesh distribution\n")); 1309 leafSize = (PetscInt)counter; 1310 PetscCall(PetscMalloc1(leafSize, &leafPoints)); 1311 PetscCallMPI(MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm)); 1312 } 1313 PetscCall(PetscFree4(scounts, sdispls, rcounts, rdispls)); 1314 } 1315 1316 /* Communicate overlap using process star forest */ 1317 if (processSF || mpiOverflow) { 1318 PetscSF procSF; 1319 PetscSection leafSection; 1320 1321 if (processSF) { 1322 PetscCall(PetscInfo(dm, "Using processSF for mesh distribution\n")); 1323 PetscCall(PetscObjectReference((PetscObject)processSF)); 1324 procSF = processSF; 1325 } else { 1326 PetscCall(PetscInfo(dm, "Using processSF for mesh distribution (MPI overflow)\n")); 1327 PetscCall(PetscSFCreate(comm, &procSF)); 1328 PetscCall(PetscSFSetGraphWithPattern(procSF, NULL, PETSCSF_PATTERN_ALLTOALL)); 1329 } 1330 1331 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection)); 1332 PetscCall(DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void **)&leafPoints)); 1333 PetscCall(PetscSectionGetStorageSize(leafSection, &leafSize)); 1334 PetscCall(PetscSectionDestroy(&leafSection)); 1335 PetscCall(PetscSFDestroy(&procSF)); 1336 } 1337 1338 for (p = 0; p < leafSize; p++) PetscCall(DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank)); 1339 1340 PetscCall(ISRestoreIndices(valueIS, &neighbors)); 1341 PetscCall(ISDestroy(&valueIS)); 1342 PetscCall(PetscSectionDestroy(&rootSection)); 1343 PetscCall(PetscFree(rootPoints)); 1344 PetscCall(PetscFree(leafPoints)); 1345 PetscCall(PetscLogEventEnd(DMPLEX_PartLabelInvert, dm, 0, 0, 0)); 1346 PetscFunctionReturn(0); 1347 } 1348 1349 /*@ 1350 DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points 1351 1352 Input Parameters: 1353 + dm - The DM 1354 - label - DMLabel assigning ranks to remote roots 1355 1356 Output Parameter: 1357 . sf - The star forest communication context encapsulating the defined mapping 1358 1359 Note: The incoming label is a receiver mapping of remote points to their parent rank. 1360 1361 Level: developer 1362 1363 .seealso: `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1364 @*/ 1365 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf) 1366 { 1367 PetscMPIInt rank; 1368 PetscInt n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors; 1369 PetscSFNode *remotePoints; 1370 IS remoteRootIS, neighborsIS; 1371 const PetscInt *remoteRoots, *neighbors; 1372 1373 PetscFunctionBegin; 1374 PetscCall(PetscLogEventBegin(DMPLEX_PartLabelCreateSF, dm, 0, 0, 0)); 1375 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank)); 1376 1377 PetscCall(DMLabelGetValueIS(label, &neighborsIS)); 1378 #if 0 1379 { 1380 IS is; 1381 PetscCall(ISDuplicate(neighborsIS, &is)); 1382 PetscCall(ISSort(is)); 1383 PetscCall(ISDestroy(&neighborsIS)); 1384 neighborsIS = is; 1385 } 1386 #endif 1387 PetscCall(ISGetLocalSize(neighborsIS, &nNeighbors)); 1388 PetscCall(ISGetIndices(neighborsIS, &neighbors)); 1389 for (numRemote = 0, n = 0; n < nNeighbors; ++n) { 1390 PetscCall(DMLabelGetStratumSize(label, neighbors[n], &numPoints)); 1391 numRemote += numPoints; 1392 } 1393 PetscCall(PetscMalloc1(numRemote, &remotePoints)); 1394 /* Put owned points first */ 1395 PetscCall(DMLabelGetStratumSize(label, rank, &numPoints)); 1396 if (numPoints > 0) { 1397 PetscCall(DMLabelGetStratumIS(label, rank, &remoteRootIS)); 1398 PetscCall(ISGetIndices(remoteRootIS, &remoteRoots)); 1399 for (p = 0; p < numPoints; p++) { 1400 remotePoints[idx].index = remoteRoots[p]; 1401 remotePoints[idx].rank = rank; 1402 idx++; 1403 } 1404 PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots)); 1405 PetscCall(ISDestroy(&remoteRootIS)); 1406 } 1407 /* Now add remote points */ 1408 for (n = 0; n < nNeighbors; ++n) { 1409 const PetscInt nn = neighbors[n]; 1410 1411 PetscCall(DMLabelGetStratumSize(label, nn, &numPoints)); 1412 if (nn == rank || numPoints <= 0) continue; 1413 PetscCall(DMLabelGetStratumIS(label, nn, &remoteRootIS)); 1414 PetscCall(ISGetIndices(remoteRootIS, &remoteRoots)); 1415 for (p = 0; p < numPoints; p++) { 1416 remotePoints[idx].index = remoteRoots[p]; 1417 remotePoints[idx].rank = nn; 1418 idx++; 1419 } 1420 PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots)); 1421 PetscCall(ISDestroy(&remoteRootIS)); 1422 } 1423 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), sf)); 1424 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 1425 PetscCall(PetscSFSetGraph(*sf, pEnd - pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER)); 1426 PetscCall(PetscSFSetUp(*sf)); 1427 PetscCall(ISDestroy(&neighborsIS)); 1428 PetscCall(PetscLogEventEnd(DMPLEX_PartLabelCreateSF, dm, 0, 0, 0)); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #if defined(PETSC_HAVE_PARMETIS) 1433 #include <parmetis.h> 1434 #endif 1435 1436 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors 1437 * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take 1438 * them out in that case. */ 1439 #if defined(PETSC_HAVE_PARMETIS) 1440 /*@C 1441 1442 DMPlexRewriteSF - Rewrites the ownership of the SF of a DM (in place). 1443 1444 Input parameters: 1445 + dm - The DMPlex object. 1446 . n - The number of points. 1447 . pointsToRewrite - The points in the SF whose ownership will change. 1448 . targetOwners - New owner for each element in pointsToRewrite. 1449 - degrees - Degrees of the points in the SF as obtained by PetscSFComputeDegreeBegin/PetscSFComputeDegreeEnd. 1450 1451 Level: developer 1452 1453 @*/ 1454 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees) 1455 { 1456 PetscInt pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs; 1457 PetscInt *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew; 1458 PetscSFNode *leafLocationsNew; 1459 const PetscSFNode *iremote; 1460 const PetscInt *ilocal; 1461 PetscBool *isLeaf; 1462 PetscSF sf; 1463 MPI_Comm comm; 1464 PetscMPIInt rank, size; 1465 1466 PetscFunctionBegin; 1467 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 1468 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1469 PetscCallMPI(MPI_Comm_size(comm, &size)); 1470 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 1471 1472 PetscCall(DMGetPointSF(dm, &sf)); 1473 PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote)); 1474 PetscCall(PetscMalloc1(pEnd - pStart, &isLeaf)); 1475 for (i = 0; i < pEnd - pStart; i++) isLeaf[i] = PETSC_FALSE; 1476 for (i = 0; i < nleafs; i++) isLeaf[ilocal[i] - pStart] = PETSC_TRUE; 1477 1478 PetscCall(PetscMalloc1(pEnd - pStart + 1, &cumSumDegrees)); 1479 cumSumDegrees[0] = 0; 1480 for (i = 1; i <= pEnd - pStart; i++) cumSumDegrees[i] = cumSumDegrees[i - 1] + degrees[i - 1]; 1481 sumDegrees = cumSumDegrees[pEnd - pStart]; 1482 /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */ 1483 1484 PetscCall(PetscMalloc1(sumDegrees, &locationsOfLeafs)); 1485 PetscCall(PetscMalloc1(pEnd - pStart, &rankOnLeafs)); 1486 for (i = 0; i < pEnd - pStart; i++) rankOnLeafs[i] = rank; 1487 PetscCall(PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs)); 1488 PetscCall(PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs)); 1489 PetscCall(PetscFree(rankOnLeafs)); 1490 1491 /* get the remote local points of my leaves */ 1492 PetscCall(PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs)); 1493 PetscCall(PetscMalloc1(pEnd - pStart, &points)); 1494 for (i = 0; i < pEnd - pStart; i++) points[i] = pStart + i; 1495 PetscCall(PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs)); 1496 PetscCall(PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs)); 1497 PetscCall(PetscFree(points)); 1498 /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */ 1499 PetscCall(PetscMalloc1(pEnd - pStart, &newOwners)); 1500 PetscCall(PetscMalloc1(pEnd - pStart, &newNumbers)); 1501 for (i = 0; i < pEnd - pStart; i++) { 1502 newOwners[i] = -1; 1503 newNumbers[i] = -1; 1504 } 1505 { 1506 PetscInt oldNumber, newNumber, oldOwner, newOwner; 1507 for (i = 0; i < n; i++) { 1508 oldNumber = pointsToRewrite[i]; 1509 newNumber = -1; 1510 oldOwner = rank; 1511 newOwner = targetOwners[i]; 1512 if (oldOwner == newOwner) { 1513 newNumber = oldNumber; 1514 } else { 1515 for (j = 0; j < degrees[oldNumber]; j++) { 1516 if (locationsOfLeafs[cumSumDegrees[oldNumber] + j] == newOwner) { 1517 newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber] + j]; 1518 break; 1519 } 1520 } 1521 } 1522 PetscCheck(newNumber != -1, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex."); 1523 1524 newOwners[oldNumber] = newOwner; 1525 newNumbers[oldNumber] = newNumber; 1526 } 1527 } 1528 PetscCall(PetscFree(cumSumDegrees)); 1529 PetscCall(PetscFree(locationsOfLeafs)); 1530 PetscCall(PetscFree(remoteLocalPointOfLeafs)); 1531 1532 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners, MPI_REPLACE)); 1533 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners, MPI_REPLACE)); 1534 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers, MPI_REPLACE)); 1535 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers, MPI_REPLACE)); 1536 1537 /* Now count how many leafs we have on each processor. */ 1538 leafCounter = 0; 1539 for (i = 0; i < pEnd - pStart; i++) { 1540 if (newOwners[i] >= 0) { 1541 if (newOwners[i] != rank) leafCounter++; 1542 } else { 1543 if (isLeaf[i]) leafCounter++; 1544 } 1545 } 1546 1547 /* Now set up the new sf by creating the leaf arrays */ 1548 PetscCall(PetscMalloc1(leafCounter, &leafsNew)); 1549 PetscCall(PetscMalloc1(leafCounter, &leafLocationsNew)); 1550 1551 leafCounter = 0; 1552 counter = 0; 1553 for (i = 0; i < pEnd - pStart; i++) { 1554 if (newOwners[i] >= 0) { 1555 if (newOwners[i] != rank) { 1556 leafsNew[leafCounter] = i; 1557 leafLocationsNew[leafCounter].rank = newOwners[i]; 1558 leafLocationsNew[leafCounter].index = newNumbers[i]; 1559 leafCounter++; 1560 } 1561 } else { 1562 if (isLeaf[i]) { 1563 leafsNew[leafCounter] = i; 1564 leafLocationsNew[leafCounter].rank = iremote[counter].rank; 1565 leafLocationsNew[leafCounter].index = iremote[counter].index; 1566 leafCounter++; 1567 } 1568 } 1569 if (isLeaf[i]) counter++; 1570 } 1571 1572 PetscCall(PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER)); 1573 PetscCall(PetscFree(newOwners)); 1574 PetscCall(PetscFree(newNumbers)); 1575 PetscCall(PetscFree(isLeaf)); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer) 1580 { 1581 PetscInt *distribution, min, max, sum; 1582 PetscMPIInt rank, size; 1583 1584 PetscFunctionBegin; 1585 PetscCallMPI(MPI_Comm_size(comm, &size)); 1586 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1587 PetscCall(PetscCalloc1(size, &distribution)); 1588 for (PetscInt i = 0; i < n; i++) { 1589 if (part) distribution[part[i]] += vtxwgt[skip * i]; 1590 else distribution[rank] += vtxwgt[skip * i]; 1591 } 1592 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm)); 1593 min = distribution[0]; 1594 max = distribution[0]; 1595 sum = distribution[0]; 1596 for (PetscInt i = 1; i < size; i++) { 1597 if (distribution[i] < min) min = distribution[i]; 1598 if (distribution[i] > max) max = distribution[i]; 1599 sum += distribution[i]; 1600 } 1601 PetscCall(PetscViewerASCIIPrintf(viewer, "Min: %" PetscInt_FMT ", Avg: %" PetscInt_FMT ", Max: %" PetscInt_FMT ", Balance: %f\n", min, sum / size, max, (max * 1. * size) / sum)); 1602 PetscCall(PetscFree(distribution)); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #endif 1607 1608 /*@ 1609 DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the PointSF of the DM inplace. 1610 1611 Input parameters: 1612 + dm - The DMPlex object. 1613 . entityDepth - depth of the entity to balance (0 -> balance vertices). 1614 . useInitialGuess - whether to use the current distribution as initial guess (only used by ParMETIS). 1615 - parallel - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS. 1616 1617 Output parameters: 1618 . success - whether the graph partitioning was successful or not, optional. Unsuccessful simply means no change to the partitioning 1619 1620 Options Database: 1621 + -dm_plex_rebalance_shared_points_parmetis - Use ParMetis instead of Metis for the partitioner 1622 . -dm_plex_rebalance_shared_points_use_initial_guess - Use current partition to bootstrap ParMetis partition 1623 . -dm_plex_rebalance_shared_points_use_mat_partitioning - Use the MatPartitioning object to perform the partition, the prefix for those operations is -dm_plex_rebalance_shared_points_ 1624 - -dm_plex_rebalance_shared_points_monitor - Monitor the shared points rebalance process 1625 1626 Developer Notes: 1627 This should use MatPartitioning to allow the use of any partitioner and not be hardwired to use ParMetis 1628 1629 Level: intermediate 1630 @*/ 1631 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success) 1632 { 1633 #if defined(PETSC_HAVE_PARMETIS) 1634 PetscSF sf; 1635 PetscInt ierr, i, j, idx, jdx; 1636 PetscInt eBegin, eEnd, nroots, nleafs, pStart, pEnd; 1637 const PetscInt *degrees, *ilocal; 1638 const PetscSFNode *iremote; 1639 PetscBool *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned; 1640 PetscInt numExclusivelyOwned, numNonExclusivelyOwned; 1641 PetscMPIInt rank, size; 1642 PetscInt *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers; 1643 const PetscInt *cumSumVertices; 1644 PetscInt offset, counter; 1645 PetscInt *vtxwgt; 1646 const PetscInt *xadj, *adjncy; 1647 PetscInt *part, *options; 1648 PetscInt nparts, wgtflag, numflag, ncon, edgecut; 1649 real_t *ubvec; 1650 PetscInt *firstVertices, *renumbering; 1651 PetscInt failed, failedGlobal; 1652 MPI_Comm comm; 1653 Mat A; 1654 PetscViewer viewer; 1655 PetscViewerFormat format; 1656 PetscLayout layout; 1657 real_t *tpwgts; 1658 PetscMPIInt *counts, *mpiCumSumVertices; 1659 PetscInt *pointsToRewrite; 1660 PetscInt numRows; 1661 PetscBool done, usematpartitioning = PETSC_FALSE; 1662 IS ispart = NULL; 1663 MatPartitioning mp; 1664 const char *prefix; 1665 1666 PetscFunctionBegin; 1667 PetscCall(PetscObjectGetComm((PetscObject)dm, &comm)); 1668 PetscCallMPI(MPI_Comm_size(comm, &size)); 1669 if (size == 1) { 1670 if (success) *success = PETSC_TRUE; 1671 PetscFunctionReturn(0); 1672 } 1673 if (success) *success = PETSC_FALSE; 1674 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1675 1676 parallel = PETSC_FALSE; 1677 useInitialGuess = PETSC_FALSE; 1678 PetscObjectOptionsBegin((PetscObject)dm); 1679 PetscCall(PetscOptionsName("-dm_plex_rebalance_shared_points_parmetis", "Use ParMetis instead of Metis for the partitioner", "DMPlexRebalanceSharedPoints", ¶llel)); 1680 PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_initial_guess", "Use current partition to bootstrap ParMetis partition", "DMPlexRebalanceSharedPoints", useInitialGuess, &useInitialGuess, NULL)); 1681 PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_mat_partitioning", "Use the MatPartitioning object to partition", "DMPlexRebalanceSharedPoints", usematpartitioning, &usematpartitioning, NULL)); 1682 PetscCall(PetscOptionsViewer("-dm_plex_rebalance_shared_points_monitor", "Monitor the shared points rebalance process", "DMPlexRebalanceSharedPoints", &viewer, &format, NULL)); 1683 PetscOptionsEnd(); 1684 if (viewer) PetscCall(PetscViewerPushFormat(viewer, format)); 1685 1686 PetscCall(PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 1687 1688 PetscCall(DMGetOptionsPrefix(dm, &prefix)); 1689 PetscCall(PetscOptionsGetViewer(comm, ((PetscObject)dm)->options, prefix, "-dm_rebalance_partition_view", &viewer, &format, NULL)); 1690 if (viewer) PetscCall(PetscViewerPushFormat(viewer, format)); 1691 1692 /* Figure out all points in the plex that we are interested in balancing. */ 1693 PetscCall(DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd)); 1694 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 1695 PetscCall(PetscMalloc1(pEnd - pStart, &toBalance)); 1696 for (i = 0; i < pEnd - pStart; i++) toBalance[i] = (PetscBool)(i >= eBegin && i < eEnd); 1697 1698 /* There are three types of points: 1699 * exclusivelyOwned: points that are owned by this process and only seen by this process 1700 * nonExclusivelyOwned: points that are owned by this process but seen by at least another process 1701 * leaf: a point that is seen by this process but owned by a different process 1702 */ 1703 PetscCall(DMGetPointSF(dm, &sf)); 1704 PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote)); 1705 PetscCall(PetscMalloc1(pEnd - pStart, &isLeaf)); 1706 PetscCall(PetscMalloc1(pEnd - pStart, &isNonExclusivelyOwned)); 1707 PetscCall(PetscMalloc1(pEnd - pStart, &isExclusivelyOwned)); 1708 for (i = 0; i < pEnd - pStart; i++) { 1709 isNonExclusivelyOwned[i] = PETSC_FALSE; 1710 isExclusivelyOwned[i] = PETSC_FALSE; 1711 isLeaf[i] = PETSC_FALSE; 1712 } 1713 1714 /* mark all the leafs */ 1715 for (i = 0; i < nleafs; i++) isLeaf[ilocal[i] - pStart] = PETSC_TRUE; 1716 1717 /* for an owned point, we can figure out whether another processor sees it or 1718 * not by calculating its degree */ 1719 PetscCall(PetscSFComputeDegreeBegin(sf, °rees)); 1720 PetscCall(PetscSFComputeDegreeEnd(sf, °rees)); 1721 numExclusivelyOwned = 0; 1722 numNonExclusivelyOwned = 0; 1723 for (i = 0; i < pEnd - pStart; i++) { 1724 if (toBalance[i]) { 1725 if (degrees[i] > 0) { 1726 isNonExclusivelyOwned[i] = PETSC_TRUE; 1727 numNonExclusivelyOwned += 1; 1728 } else { 1729 if (!isLeaf[i]) { 1730 isExclusivelyOwned[i] = PETSC_TRUE; 1731 numExclusivelyOwned += 1; 1732 } 1733 } 1734 } 1735 } 1736 1737 /* Build a graph with one vertex per core representing the 1738 * exclusively owned points and then one vertex per nonExclusively owned 1739 * point. */ 1740 PetscCall(PetscLayoutCreate(comm, &layout)); 1741 PetscCall(PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned)); 1742 PetscCall(PetscLayoutSetUp(layout)); 1743 PetscCall(PetscLayoutGetRanges(layout, &cumSumVertices)); 1744 PetscCall(PetscMalloc1(pEnd - pStart, &globalNumbersOfLocalOwnedVertices)); 1745 for (i = 0; i < pEnd - pStart; i++) globalNumbersOfLocalOwnedVertices[i] = pStart - 1; 1746 offset = cumSumVertices[rank]; 1747 counter = 0; 1748 for (i = 0; i < pEnd - pStart; i++) { 1749 if (toBalance[i]) { 1750 if (degrees[i] > 0) { 1751 globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset; 1752 counter++; 1753 } 1754 } 1755 } 1756 1757 /* send the global numbers of vertices I own to the leafs so that they know to connect to it */ 1758 PetscCall(PetscMalloc1(pEnd - pStart, &leafGlobalNumbers)); 1759 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers, MPI_REPLACE)); 1760 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers, MPI_REPLACE)); 1761 1762 /* Build the graph for partitioning */ 1763 numRows = 1 + numNonExclusivelyOwned; 1764 PetscCall(PetscLogEventBegin(DMPLEX_RebalBuildGraph, dm, 0, 0, 0)); 1765 PetscCall(MatCreate(comm, &A)); 1766 PetscCall(MatSetType(A, MATMPIADJ)); 1767 PetscCall(MatSetSizes(A, numRows, numRows, cumSumVertices[size], cumSumVertices[size])); 1768 idx = cumSumVertices[rank]; 1769 for (i = 0; i < pEnd - pStart; i++) { 1770 if (toBalance[i]) { 1771 if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i]; 1772 else if (isLeaf[i]) jdx = leafGlobalNumbers[i]; 1773 else continue; 1774 PetscCall(MatSetValue(A, idx, jdx, 1, INSERT_VALUES)); 1775 PetscCall(MatSetValue(A, jdx, idx, 1, INSERT_VALUES)); 1776 } 1777 } 1778 PetscCall(PetscFree(globalNumbersOfLocalOwnedVertices)); 1779 PetscCall(PetscFree(leafGlobalNumbers)); 1780 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1781 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1782 PetscCall(PetscLogEventEnd(DMPLEX_RebalBuildGraph, dm, 0, 0, 0)); 1783 1784 nparts = size; 1785 ncon = 1; 1786 PetscCall(PetscMalloc1(ncon * nparts, &tpwgts)); 1787 for (i = 0; i < ncon * nparts; i++) tpwgts[i] = 1. / (nparts); 1788 PetscCall(PetscMalloc1(ncon, &ubvec)); 1789 ubvec[0] = 1.05; 1790 ubvec[1] = 1.05; 1791 1792 PetscCall(PetscMalloc1(ncon * (1 + numNonExclusivelyOwned), &vtxwgt)); 1793 if (ncon == 2) { 1794 vtxwgt[0] = numExclusivelyOwned; 1795 vtxwgt[1] = 1; 1796 for (i = 0; i < numNonExclusivelyOwned; i++) { 1797 vtxwgt[ncon * (i + 1)] = 1; 1798 vtxwgt[ncon * (i + 1) + 1] = 0; 1799 } 1800 } else { 1801 PetscInt base, ms; 1802 PetscCallMPI(MPI_Allreduce(&numExclusivelyOwned, &base, 1, MPIU_INT, MPIU_MAX, PetscObjectComm((PetscObject)dm))); 1803 PetscCall(MatGetSize(A, &ms, NULL)); 1804 ms -= size; 1805 base = PetscMax(base, ms); 1806 vtxwgt[0] = base + numExclusivelyOwned; 1807 for (i = 0; i < numNonExclusivelyOwned; i++) vtxwgt[i + 1] = 1; 1808 } 1809 1810 if (viewer) { 1811 PetscCall(PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %" PetscInt_FMT " on interface of mesh distribution.\n", entityDepth)); 1812 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %" PetscInt_FMT "\n", cumSumVertices[size])); 1813 } 1814 /* TODO: Drop the parallel/sequential choice here and just use MatPartioner for much more flexibility */ 1815 if (usematpartitioning) { 1816 const char *prefix; 1817 1818 PetscCall(MatPartitioningCreate(PetscObjectComm((PetscObject)dm), &mp)); 1819 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)mp, "dm_plex_rebalance_shared_points_")); 1820 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix)); 1821 PetscCall(PetscObjectPrependOptionsPrefix((PetscObject)mp, prefix)); 1822 PetscCall(MatPartitioningSetAdjacency(mp, A)); 1823 PetscCall(MatPartitioningSetNumberVertexWeights(mp, ncon)); 1824 PetscCall(MatPartitioningSetVertexWeights(mp, vtxwgt)); 1825 PetscCall(MatPartitioningSetFromOptions(mp)); 1826 PetscCall(MatPartitioningApply(mp, &ispart)); 1827 PetscCall(ISGetIndices(ispart, (const PetscInt **)&part)); 1828 } else if (parallel) { 1829 if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n")); 1830 PetscCall(PetscMalloc1(cumSumVertices[rank + 1] - cumSumVertices[rank], &part)); 1831 PetscCall(MatGetRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, &xadj, &adjncy, &done)); 1832 PetscCheck(done, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Could not get adjacency information"); 1833 PetscCall(PetscMalloc1(4, &options)); 1834 options[0] = 1; 1835 options[1] = 0; /* Verbosity */ 1836 if (viewer) options[1] = 1; 1837 options[2] = 0; /* Seed */ 1838 options[3] = PARMETIS_PSR_COUPLED; /* Seed */ 1839 wgtflag = 2; 1840 numflag = 0; 1841 if (useInitialGuess) { 1842 PetscCall(PetscViewerASCIIPrintf(viewer, "THIS DOES NOT WORK! I don't know why. Using current distribution of points as initial guess.\n")); 1843 for (i = 0; i < numRows; i++) part[i] = rank; 1844 if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n")); 1845 PetscStackPushExternal("ParMETIS_V3_RefineKway"); 1846 PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0)); 1847 ierr = ParMETIS_V3_RefineKway((PetscInt *)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); 1848 PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0)); 1849 PetscStackPop; 1850 PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()"); 1851 } else { 1852 PetscStackPushExternal("ParMETIS_V3_PartKway"); 1853 PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0)); 1854 ierr = ParMETIS_V3_PartKway((PetscInt *)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); 1855 PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0)); 1856 PetscStackPop; 1857 PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()"); 1858 } 1859 PetscCall(MatRestoreRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, &xadj, &adjncy, &done)); 1860 PetscCall(PetscFree(options)); 1861 } else { 1862 if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n")); 1863 Mat As; 1864 PetscInt *partGlobal; 1865 PetscInt *numExclusivelyOwnedAll; 1866 1867 PetscCall(PetscMalloc1(cumSumVertices[rank + 1] - cumSumVertices[rank], &part)); 1868 PetscCall(MatGetSize(A, &numRows, NULL)); 1869 PetscCall(PetscLogEventBegin(DMPLEX_RebalGatherGraph, dm, 0, 0, 0)); 1870 PetscCall(MatMPIAdjToSeqRankZero(A, &As)); 1871 PetscCall(PetscLogEventEnd(DMPLEX_RebalGatherGraph, dm, 0, 0, 0)); 1872 1873 PetscCall(PetscMalloc1(size, &numExclusivelyOwnedAll)); 1874 numExclusivelyOwnedAll[rank] = numExclusivelyOwned; 1875 PetscCallMPI(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, numExclusivelyOwnedAll, 1, MPIU_INT, comm)); 1876 1877 PetscCall(PetscMalloc1(numRows, &partGlobal)); 1878 PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition, 0, 0, 0, 0)); 1879 if (rank == 0) { 1880 PetscInt *vtxwgt_g, numRows_g; 1881 1882 PetscCall(MatGetRowIJ(As, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows_g, &xadj, &adjncy, &done)); 1883 PetscCall(PetscMalloc1(2 * numRows_g, &vtxwgt_g)); 1884 for (i = 0; i < size; i++) { 1885 vtxwgt_g[ncon * cumSumVertices[i]] = numExclusivelyOwnedAll[i]; 1886 if (ncon > 1) vtxwgt_g[ncon * cumSumVertices[i] + 1] = 1; 1887 for (j = cumSumVertices[i] + 1; j < cumSumVertices[i + 1]; j++) { 1888 vtxwgt_g[ncon * j] = 1; 1889 if (ncon > 1) vtxwgt_g[2 * j + 1] = 0; 1890 } 1891 } 1892 1893 PetscCall(PetscMalloc1(64, &options)); 1894 ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */ 1895 PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()"); 1896 options[METIS_OPTION_CONTIG] = 1; 1897 PetscStackPushExternal("METIS_PartGraphKway"); 1898 ierr = METIS_PartGraphKway(&numRows_g, &ncon, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal); 1899 PetscStackPop; 1900 PetscCheck(ierr == METIS_OK, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()"); 1901 PetscCall(PetscFree(options)); 1902 PetscCall(PetscFree(vtxwgt_g)); 1903 PetscCall(MatRestoreRowIJ(As, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows_g, &xadj, &adjncy, &done)); 1904 PetscCall(MatDestroy(&As)); 1905 } 1906 PetscCall(PetscBarrier((PetscObject)dm)); 1907 PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition, 0, 0, 0, 0)); 1908 PetscCall(PetscFree(numExclusivelyOwnedAll)); 1909 1910 /* scatter the partitioning information to ranks */ 1911 PetscCall(PetscLogEventBegin(DMPLEX_RebalScatterPart, 0, 0, 0, 0)); 1912 PetscCall(PetscMalloc1(size, &counts)); 1913 PetscCall(PetscMalloc1(size + 1, &mpiCumSumVertices)); 1914 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(cumSumVertices[i + 1] - cumSumVertices[i], &(counts[i]))); 1915 for (i = 0; i <= size; i++) PetscCall(PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i]))); 1916 PetscCallMPI(MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm)); 1917 PetscCall(PetscFree(counts)); 1918 PetscCall(PetscFree(mpiCumSumVertices)); 1919 PetscCall(PetscFree(partGlobal)); 1920 PetscCall(PetscLogEventEnd(DMPLEX_RebalScatterPart, 0, 0, 0, 0)); 1921 } 1922 PetscCall(PetscFree(ubvec)); 1923 PetscCall(PetscFree(tpwgts)); 1924 1925 /* Rename the result so that the vertex resembling the exclusively owned points stays on the same rank */ 1926 PetscCall(PetscMalloc2(size, &firstVertices, size, &renumbering)); 1927 firstVertices[rank] = part[0]; 1928 PetscCallMPI(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, firstVertices, 1, MPIU_INT, comm)); 1929 for (i = 0; i < size; i++) renumbering[firstVertices[i]] = i; 1930 for (i = 0; i < cumSumVertices[rank + 1] - cumSumVertices[rank]; i++) part[i] = renumbering[part[i]]; 1931 PetscCall(PetscFree2(firstVertices, renumbering)); 1932 1933 /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */ 1934 failed = (PetscInt)(part[0] != rank); 1935 PetscCallMPI(MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm)); 1936 if (failedGlobal > 0) { 1937 PetscCheck(failedGlobal <= 0, comm, PETSC_ERR_LIB, "Metis/Parmetis returned a bad partion"); 1938 PetscCall(PetscFree(vtxwgt)); 1939 PetscCall(PetscFree(toBalance)); 1940 PetscCall(PetscFree(isLeaf)); 1941 PetscCall(PetscFree(isNonExclusivelyOwned)); 1942 PetscCall(PetscFree(isExclusivelyOwned)); 1943 if (usematpartitioning) { 1944 PetscCall(ISRestoreIndices(ispart, (const PetscInt **)&part)); 1945 PetscCall(ISDestroy(&ispart)); 1946 } else PetscCall(PetscFree(part)); 1947 if (viewer) { 1948 PetscCall(PetscViewerPopFormat(viewer)); 1949 PetscCall(PetscViewerDestroy(&viewer)); 1950 } 1951 PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 /* Check how well we did distributing points*/ 1956 if (viewer) { 1957 PetscCall(PetscViewerASCIIPrintf(viewer, "Number of owned entities of depth %" PetscInt_FMT ".\n", entityDepth)); 1958 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial ")); 1959 PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, NULL, viewer)); 1960 PetscCall(PetscViewerASCIIPrintf(viewer, "Rebalanced ")); 1961 PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, part, viewer)); 1962 } 1963 1964 /* Check that every vertex is owned by a process that it is actually connected to. */ 1965 PetscCall(MatGetRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &done)); 1966 for (i = 1; i <= numNonExclusivelyOwned; i++) { 1967 PetscInt loc = 0; 1968 PetscCall(PetscFindInt(cumSumVertices[part[i]], xadj[i + 1] - xadj[i], &adjncy[xadj[i]], &loc)); 1969 /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */ 1970 if (loc < 0) part[i] = rank; 1971 } 1972 PetscCall(MatRestoreRowIJ(A, PETSC_FALSE, PETSC_FALSE, PETSC_FALSE, &numRows, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &done)); 1973 PetscCall(MatDestroy(&A)); 1974 1975 /* See how significant the influences of the previous fixing up step was.*/ 1976 if (viewer) { 1977 PetscCall(PetscViewerASCIIPrintf(viewer, "After fix ")); 1978 PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank + 1] - cumSumVertices[rank], ncon, vtxwgt, part, viewer)); 1979 } 1980 if (!usematpartitioning) PetscCall(PetscFree(vtxwgt)); 1981 else PetscCall(MatPartitioningDestroy(&mp)); 1982 1983 PetscCall(PetscLayoutDestroy(&layout)); 1984 1985 PetscCall(PetscLogEventBegin(DMPLEX_RebalRewriteSF, dm, 0, 0, 0)); 1986 /* Rewrite the SF to reflect the new ownership. */ 1987 PetscCall(PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite)); 1988 counter = 0; 1989 for (i = 0; i < pEnd - pStart; i++) { 1990 if (toBalance[i]) { 1991 if (isNonExclusivelyOwned[i]) { 1992 pointsToRewrite[counter] = i + pStart; 1993 counter++; 1994 } 1995 } 1996 } 1997 PetscCall(DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part + 1, degrees)); 1998 PetscCall(PetscFree(pointsToRewrite)); 1999 PetscCall(PetscLogEventEnd(DMPLEX_RebalRewriteSF, dm, 0, 0, 0)); 2000 2001 PetscCall(PetscFree(toBalance)); 2002 PetscCall(PetscFree(isLeaf)); 2003 PetscCall(PetscFree(isNonExclusivelyOwned)); 2004 PetscCall(PetscFree(isExclusivelyOwned)); 2005 if (usematpartitioning) { 2006 PetscCall(ISRestoreIndices(ispart, (const PetscInt **)&part)); 2007 PetscCall(ISDestroy(&ispart)); 2008 } else PetscCall(PetscFree(part)); 2009 if (viewer) { 2010 PetscCall(PetscViewerPopFormat(viewer)); 2011 PetscCall(PetscViewerDestroy(&viewer)); 2012 } 2013 if (success) *success = PETSC_TRUE; 2014 PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 2015 PetscFunctionReturn(0); 2016 #else 2017 SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis."); 2018 #endif 2019 } 2020