1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petsc/private/partitionerimpl.h> 3 #include <petsc/private/hashseti.h> 4 5 const char * const DMPlexCSRAlgorithms[] = {"mat", "graph", "overlap", "DMPlexCSRAlgorithm", "DM_PLEX_CSR_",NULL}; 6 7 static inline PetscInt DMPlex_GlobalID(PetscInt point) { return point >= 0 ? point : -(point+1); } 8 9 static PetscErrorCode DMPlexCreatePartitionerGraph_Overlap(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 10 { 11 DM ovdm; 12 PetscSF sfPoint; 13 IS cellNumbering; 14 const PetscInt *cellNum; 15 PetscInt *adj = NULL, *vOffsets = NULL, *vAdj = NULL; 16 PetscBool useCone, useClosure; 17 PetscInt dim, depth, overlap, cStart, cEnd, c, v; 18 PetscMPIInt rank, size; 19 20 PetscFunctionBegin; 21 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 22 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject) dm), &size)); 23 PetscCall(DMGetDimension(dm, &dim)); 24 PetscCall(DMPlexGetDepth(dm, &depth)); 25 if (dim != depth) { 26 /* We do not handle the uninterpolated case here */ 27 PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 28 /* DMPlexCreateNeighborCSR does not make a numbering */ 29 if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 30 /* Different behavior for empty graphs */ 31 if (!*numVertices) { 32 PetscCall(PetscMalloc1(1, offsets)); 33 (*offsets)[0] = 0; 34 } 35 /* Broken in parallel */ 36 if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 37 PetscFunctionReturn(0); 38 } 39 /* Always use FVM adjacency to create partitioner graph */ 40 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 41 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 42 /* Need overlap >= 1 */ 43 PetscCall(DMPlexGetOverlap(dm, &overlap)); 44 if (size && overlap < 1) { 45 PetscCall(DMPlexDistributeOverlap(dm, 1, NULL, &ovdm)); 46 } else { 47 PetscCall(PetscObjectReference((PetscObject) dm)); 48 ovdm = dm; 49 } 50 PetscCall(DMGetPointSF(ovdm, &sfPoint)); 51 PetscCall(DMPlexGetHeightStratum(ovdm, height, &cStart, &cEnd)); 52 PetscCall(DMPlexCreateNumbering_Plex(ovdm, cStart, cEnd, 0, NULL, sfPoint, &cellNumbering)); 53 if (globalNumbering) { 54 PetscCall(PetscObjectReference((PetscObject) cellNumbering)); 55 *globalNumbering = cellNumbering; 56 } 57 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 58 /* Determine sizes */ 59 for (*numVertices = 0, c = cStart; c < cEnd; ++c) { 60 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 61 if (cellNum[c-cStart] < 0) continue; 62 (*numVertices)++; 63 } 64 PetscCall(PetscCalloc1(*numVertices+1, &vOffsets)); 65 for (c = cStart, v = 0; c < cEnd; ++c) { 66 PetscInt adjSize = PETSC_DETERMINE, a, vsize = 0; 67 68 if (cellNum[c-cStart] < 0) continue; 69 PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj)); 70 for (a = 0; a < adjSize; ++a) { 71 const PetscInt point = adj[a]; 72 if (point != c && cStart <= point && point < cEnd) ++vsize; 73 } 74 vOffsets[v+1] = vOffsets[v] + vsize; 75 ++v; 76 } 77 /* Determine adjacency */ 78 PetscCall(PetscMalloc1(vOffsets[*numVertices], &vAdj)); 79 for (c = cStart, v = 0; c < cEnd; ++c) { 80 PetscInt adjSize = PETSC_DETERMINE, a, off = vOffsets[v]; 81 82 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 83 if (cellNum[c-cStart] < 0) continue; 84 PetscCall(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj)); 85 for (a = 0; a < adjSize; ++a) { 86 const PetscInt point = adj[a]; 87 if (point != c && cStart <= point && point < cEnd) { 88 vAdj[off++] = DMPlex_GlobalID(cellNum[point-cStart]); 89 } 90 } 91 PetscCheck(off == vOffsets[v+1],PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Offsets %" PetscInt_FMT " should be %" PetscInt_FMT, off, vOffsets[v+1]); 92 /* Sort adjacencies (not strictly necessary) */ 93 PetscCall(PetscSortInt(off-vOffsets[v], &vAdj[vOffsets[v]])); 94 ++v; 95 } 96 PetscCall(PetscFree(adj)); 97 PetscCall(ISRestoreIndices(cellNumbering, &cellNum)); 98 PetscCall(ISDestroy(&cellNumbering)); 99 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 100 PetscCall(DMDestroy(&ovdm)); 101 if (offsets) {*offsets = vOffsets;} 102 else PetscCall(PetscFree(vOffsets)); 103 if (adjacency) {*adjacency = vAdj;} 104 else PetscCall(PetscFree(vAdj)); 105 PetscFunctionReturn(0); 106 } 107 108 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 109 { 110 PetscInt dim, depth, p, pStart, pEnd, a, adjSize, idx, size; 111 PetscInt *adj = NULL, *vOffsets = NULL, *graph = NULL; 112 IS cellNumbering; 113 const PetscInt *cellNum; 114 PetscBool useCone, useClosure; 115 PetscSection section; 116 PetscSegBuffer adjBuffer; 117 PetscSF sfPoint; 118 PetscInt *adjCells = NULL, *remoteCells = NULL; 119 const PetscInt *local; 120 PetscInt nroots, nleaves, l; 121 PetscMPIInt rank; 122 123 PetscFunctionBegin; 124 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 125 PetscCall(DMGetDimension(dm, &dim)); 126 PetscCall(DMPlexGetDepth(dm, &depth)); 127 if (dim != depth) { 128 /* We do not handle the uninterpolated case here */ 129 PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 130 /* DMPlexCreateNeighborCSR does not make a numbering */ 131 if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 132 /* Different behavior for empty graphs */ 133 if (!*numVertices) { 134 PetscCall(PetscMalloc1(1, offsets)); 135 (*offsets)[0] = 0; 136 } 137 /* Broken in parallel */ 138 if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 139 PetscFunctionReturn(0); 140 } 141 PetscCall(DMGetPointSF(dm, &sfPoint)); 142 PetscCall(DMPlexGetHeightStratum(dm, height, &pStart, &pEnd)); 143 /* Build adjacency graph via a section/segbuffer */ 144 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 145 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 146 PetscCall(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 147 /* Always use FVM adjacency to create partitioner graph */ 148 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 149 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 150 PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering)); 151 if (globalNumbering) { 152 PetscCall(PetscObjectReference((PetscObject)cellNumbering)); 153 *globalNumbering = cellNumbering; 154 } 155 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 156 /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells 157 Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves) 158 */ 159 PetscCall(PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL)); 160 if (nroots >= 0) { 161 PetscInt fStart, fEnd, f; 162 163 PetscCall(PetscCalloc2(nroots, &adjCells, nroots, &remoteCells)); 164 PetscCall(DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd)); 165 for (l = 0; l < nroots; ++l) adjCells[l] = -3; 166 for (f = fStart; f < fEnd; ++f) { 167 const PetscInt *support; 168 PetscInt supportSize; 169 170 PetscCall(DMPlexGetSupport(dm, f, &support)); 171 PetscCall(DMPlexGetSupportSize(dm, f, &supportSize)); 172 if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]-pStart]); 173 else if (supportSize == 2) { 174 PetscCall(PetscFindInt(support[0], nleaves, local, &p)); 175 if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1]-pStart]); 176 PetscCall(PetscFindInt(support[1], nleaves, local, &p)); 177 if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]-pStart]); 178 } 179 /* Handle non-conforming meshes */ 180 if (supportSize > 2) { 181 PetscInt numChildren, i; 182 const PetscInt *children; 183 184 PetscCall(DMPlexGetTreeChildren(dm, f, &numChildren, &children)); 185 for (i = 0; i < numChildren; ++i) { 186 const PetscInt child = children[i]; 187 if (fStart <= child && child < fEnd) { 188 PetscCall(DMPlexGetSupport(dm, child, &support)); 189 PetscCall(DMPlexGetSupportSize(dm, child, &supportSize)); 190 if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]-pStart]); 191 else if (supportSize == 2) { 192 PetscCall(PetscFindInt(support[0], nleaves, local, &p)); 193 if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1]-pStart]); 194 PetscCall(PetscFindInt(support[1], nleaves, local, &p)); 195 if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]-pStart]); 196 } 197 } 198 } 199 } 200 } 201 for (l = 0; l < nroots; ++l) remoteCells[l] = -1; 202 PetscCall(PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells,MPI_REPLACE)); 203 PetscCall(PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells,MPI_REPLACE)); 204 PetscCall(PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX)); 205 PetscCall(PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX)); 206 } 207 /* Combine local and global adjacencies */ 208 for (*numVertices = 0, p = pStart; p < pEnd; p++) { 209 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 210 if (nroots > 0) {if (cellNum[p-pStart] < 0) continue;} 211 /* Add remote cells */ 212 if (remoteCells) { 213 const PetscInt gp = DMPlex_GlobalID(cellNum[p-pStart]); 214 PetscInt coneSize, numChildren, c, i; 215 const PetscInt *cone, *children; 216 217 PetscCall(DMPlexGetCone(dm, p, &cone)); 218 PetscCall(DMPlexGetConeSize(dm, p, &coneSize)); 219 for (c = 0; c < coneSize; ++c) { 220 const PetscInt point = cone[c]; 221 if (remoteCells[point] >= 0 && remoteCells[point] != gp) { 222 PetscInt *PETSC_RESTRICT pBuf; 223 PetscCall(PetscSectionAddDof(section, p, 1)); 224 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 225 *pBuf = remoteCells[point]; 226 } 227 /* Handle non-conforming meshes */ 228 PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 229 for (i = 0; i < numChildren; ++i) { 230 const PetscInt child = children[i]; 231 if (remoteCells[child] >= 0 && remoteCells[child] != gp) { 232 PetscInt *PETSC_RESTRICT pBuf; 233 PetscCall(PetscSectionAddDof(section, p, 1)); 234 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 235 *pBuf = remoteCells[child]; 236 } 237 } 238 } 239 } 240 /* Add local cells */ 241 adjSize = PETSC_DETERMINE; 242 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 243 for (a = 0; a < adjSize; ++a) { 244 const PetscInt point = adj[a]; 245 if (point != p && pStart <= point && point < pEnd) { 246 PetscInt *PETSC_RESTRICT pBuf; 247 PetscCall(PetscSectionAddDof(section, p, 1)); 248 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 249 *pBuf = DMPlex_GlobalID(cellNum[point-pStart]); 250 } 251 } 252 (*numVertices)++; 253 } 254 PetscCall(PetscFree(adj)); 255 PetscCall(PetscFree2(adjCells, remoteCells)); 256 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 257 258 /* Derive CSR graph from section/segbuffer */ 259 PetscCall(PetscSectionSetUp(section)); 260 PetscCall(PetscSectionGetStorageSize(section, &size)); 261 PetscCall(PetscMalloc1(*numVertices+1, &vOffsets)); 262 for (idx = 0, p = pStart; p < pEnd; p++) { 263 if (nroots > 0) {if (cellNum[p-pStart] < 0) continue;} 264 PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++]))); 265 } 266 vOffsets[*numVertices] = size; 267 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph)); 268 269 if (nroots >= 0) { 270 /* Filter out duplicate edges using section/segbuffer */ 271 PetscCall(PetscSectionReset(section)); 272 PetscCall(PetscSectionSetChart(section, 0, *numVertices)); 273 for (p = 0; p < *numVertices; p++) { 274 PetscInt start = vOffsets[p], end = vOffsets[p+1]; 275 PetscInt numEdges = end-start, *PETSC_RESTRICT edges; 276 PetscCall(PetscSortRemoveDupsInt(&numEdges, &graph[start])); 277 PetscCall(PetscSectionSetDof(section, p, numEdges)); 278 PetscCall(PetscSegBufferGetInts(adjBuffer, numEdges, &edges)); 279 PetscCall(PetscArraycpy(edges, &graph[start], numEdges)); 280 } 281 PetscCall(PetscFree(vOffsets)); 282 PetscCall(PetscFree(graph)); 283 /* Derive CSR graph from section/segbuffer */ 284 PetscCall(PetscSectionSetUp(section)); 285 PetscCall(PetscSectionGetStorageSize(section, &size)); 286 PetscCall(PetscMalloc1(*numVertices+1, &vOffsets)); 287 for (idx = 0, p = 0; p < *numVertices; p++) { 288 PetscCall(PetscSectionGetOffset(section, p, &(vOffsets[idx++]))); 289 } 290 vOffsets[*numVertices] = size; 291 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &graph)); 292 } else { 293 /* Sort adjacencies (not strictly necessary) */ 294 for (p = 0; p < *numVertices; p++) { 295 PetscInt start = vOffsets[p], end = vOffsets[p+1]; 296 PetscCall(PetscSortInt(end-start, &graph[start])); 297 } 298 } 299 300 if (offsets) *offsets = vOffsets; 301 if (adjacency) *adjacency = graph; 302 303 /* Cleanup */ 304 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 305 PetscCall(PetscSectionDestroy(§ion)); 306 PetscCall(ISRestoreIndices(cellNumbering, &cellNum)); 307 PetscCall(ISDestroy(&cellNumbering)); 308 PetscFunctionReturn(0); 309 } 310 311 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 312 { 313 Mat conn, CSR; 314 IS fis, cis, cis_own; 315 PetscSF sfPoint; 316 const PetscInt *rows, *cols, *ii, *jj; 317 PetscInt *idxs,*idxs2; 318 PetscInt dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd; 319 PetscMPIInt rank; 320 PetscBool flg; 321 322 PetscFunctionBegin; 323 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 324 PetscCall(DMGetDimension(dm, &dim)); 325 PetscCall(DMPlexGetDepth(dm, &depth)); 326 if (dim != depth) { 327 /* We do not handle the uninterpolated case here */ 328 PetscCall(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 329 /* DMPlexCreateNeighborCSR does not make a numbering */ 330 if (globalNumbering) PetscCall(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 331 /* Different behavior for empty graphs */ 332 if (!*numVertices) { 333 PetscCall(PetscMalloc1(1, offsets)); 334 (*offsets)[0] = 0; 335 } 336 /* Broken in parallel */ 337 if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 338 PetscFunctionReturn(0); 339 } 340 /* Interpolated and parallel case */ 341 342 /* numbering */ 343 PetscCall(DMGetPointSF(dm, &sfPoint)); 344 PetscCall(DMPlexGetHeightStratum(dm, height, &cStart, &cEnd)); 345 PetscCall(DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd)); 346 PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis)); 347 PetscCall(DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis)); 348 if (globalNumbering) PetscCall(ISDuplicate(cis, globalNumbering)); 349 350 /* get positive global ids and local sizes for facets and cells */ 351 PetscCall(ISGetLocalSize(fis, &m)); 352 PetscCall(ISGetIndices(fis, &rows)); 353 PetscCall(PetscMalloc1(m, &idxs)); 354 for (i = 0, floc = 0; i < m; i++) { 355 const PetscInt p = rows[i]; 356 357 if (p < 0) { 358 idxs[i] = -(p+1); 359 } else { 360 idxs[i] = p; 361 floc += 1; 362 } 363 } 364 PetscCall(ISRestoreIndices(fis, &rows)); 365 PetscCall(ISDestroy(&fis)); 366 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis)); 367 368 PetscCall(ISGetLocalSize(cis, &m)); 369 PetscCall(ISGetIndices(cis, &cols)); 370 PetscCall(PetscMalloc1(m, &idxs)); 371 PetscCall(PetscMalloc1(m, &idxs2)); 372 for (i = 0, cloc = 0; i < m; i++) { 373 const PetscInt p = cols[i]; 374 375 if (p < 0) { 376 idxs[i] = -(p+1); 377 } else { 378 idxs[i] = p; 379 idxs2[cloc++] = p; 380 } 381 } 382 PetscCall(ISRestoreIndices(cis, &cols)); 383 PetscCall(ISDestroy(&cis)); 384 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis)); 385 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own)); 386 387 /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */ 388 PetscCall(MatCreate(PetscObjectComm((PetscObject)dm), &conn)); 389 PetscCall(MatSetSizes(conn, floc, cloc, M, N)); 390 PetscCall(MatSetType(conn, MATMPIAIJ)); 391 PetscCall(DMPlexGetMaxSizes(dm, NULL, &lm)); 392 PetscCallMPI(MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject) dm))); 393 PetscCall(MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL)); 394 395 /* Assemble matrix */ 396 PetscCall(ISGetIndices(fis, &rows)); 397 PetscCall(ISGetIndices(cis, &cols)); 398 for (c = cStart; c < cEnd; c++) { 399 const PetscInt *cone; 400 PetscInt coneSize, row, col, f; 401 402 col = cols[c-cStart]; 403 PetscCall(DMPlexGetCone(dm, c, &cone)); 404 PetscCall(DMPlexGetConeSize(dm, c, &coneSize)); 405 for (f = 0; f < coneSize; f++) { 406 const PetscScalar v = 1.0; 407 const PetscInt *children; 408 PetscInt numChildren, ch; 409 410 row = rows[cone[f]-fStart]; 411 PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES)); 412 413 /* non-conforming meshes */ 414 PetscCall(DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children)); 415 for (ch = 0; ch < numChildren; ch++) { 416 const PetscInt child = children[ch]; 417 418 if (child < fStart || child >= fEnd) continue; 419 row = rows[child-fStart]; 420 PetscCall(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES)); 421 } 422 } 423 } 424 PetscCall(ISRestoreIndices(fis, &rows)); 425 PetscCall(ISRestoreIndices(cis, &cols)); 426 PetscCall(ISDestroy(&fis)); 427 PetscCall(ISDestroy(&cis)); 428 PetscCall(MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY)); 429 PetscCall(MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY)); 430 431 /* Get parallel CSR by doing conn^T * conn */ 432 PetscCall(MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR)); 433 PetscCall(MatDestroy(&conn)); 434 435 /* extract local part of the CSR */ 436 PetscCall(MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn)); 437 PetscCall(MatDestroy(&CSR)); 438 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg)); 439 PetscCheck(flg,PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format"); 440 441 /* get back requested output */ 442 if (numVertices) *numVertices = m; 443 if (offsets) { 444 PetscCall(PetscCalloc1(m+1, &idxs)); 445 for (i = 1; i < m+1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */ 446 *offsets = idxs; 447 } 448 if (adjacency) { 449 PetscCall(PetscMalloc1(ii[m] - m, &idxs)); 450 PetscCall(ISGetIndices(cis_own, &rows)); 451 for (i = 0, c = 0; i < m; i++) { 452 PetscInt j, g = rows[i]; 453 454 for (j = ii[i]; j < ii[i+1]; j++) { 455 if (jj[j] == g) continue; /* again, self-connectivity */ 456 idxs[c++] = jj[j]; 457 } 458 } 459 PetscCheck(c == ii[m] - m,PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT,c,ii[m]-m); 460 PetscCall(ISRestoreIndices(cis_own, &rows)); 461 *adjacency = idxs; 462 } 463 464 /* cleanup */ 465 PetscCall(ISDestroy(&cis_own)); 466 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg)); 467 PetscCheck(flg,PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format"); 468 PetscCall(MatDestroy(&conn)); 469 PetscFunctionReturn(0); 470 } 471 472 /*@C 473 DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner 474 475 Input Parameters: 476 + dm - The mesh DM dm 477 - height - Height of the strata from which to construct the graph 478 479 Output Parameters: 480 + numVertices - Number of vertices in the graph 481 . offsets - Point offsets in the graph 482 . adjacency - Point connectivity in the graph 483 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency". Negative indicates that the cell is a duplicate from another process. 484 485 The user can control the definition of adjacency for the mesh using DMSetAdjacency(). They should choose the combination appropriate for the function 486 representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree(). 487 488 Options Database Keys: 489 . -dm_plex_csr_alg <mat,graph,overlap> - Choose the algorithm for computing the CSR graph 490 491 Level: developer 492 493 .seealso: `PetscPartitionerGetType()`, `PetscPartitionerCreate()`, `DMSetAdjacency()` 494 @*/ 495 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 496 { 497 DMPlexCSRAlgorithm alg = DM_PLEX_CSR_GRAPH; 498 499 PetscFunctionBegin; 500 PetscCall(PetscOptionsGetEnum(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-dm_plex_csr_alg", DMPlexCSRAlgorithms, (PetscEnum *) &alg, NULL)); 501 switch (alg) { 502 case DM_PLEX_CSR_MAT: 503 PetscCall(DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering));break; 504 case DM_PLEX_CSR_GRAPH: 505 PetscCall(DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering));break; 506 case DM_PLEX_CSR_OVERLAP: 507 PetscCall(DMPlexCreatePartitionerGraph_Overlap(dm, height, numVertices, offsets, adjacency, globalNumbering));break; 508 } 509 PetscFunctionReturn(0); 510 } 511 512 /*@C 513 DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format. 514 515 Collective on DM 516 517 Input Parameters: 518 + dm - The DMPlex 519 - cellHeight - The height of mesh points to treat as cells (default should be 0) 520 521 Output Parameters: 522 + numVertices - The number of local vertices in the graph, or cells in the mesh. 523 . offsets - The offset to the adjacency list for each cell 524 - adjacency - The adjacency list for all cells 525 526 Note: This is suitable for input to a mesh partitioner like ParMetis. 527 528 Level: advanced 529 530 .seealso: `DMPlexCreate()` 531 @*/ 532 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency) 533 { 534 const PetscInt maxFaceCases = 30; 535 PetscInt numFaceCases = 0; 536 PetscInt numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */ 537 PetscInt *off, *adj; 538 PetscInt *neighborCells = NULL; 539 PetscInt dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell; 540 541 PetscFunctionBegin; 542 /* For parallel partitioning, I think you have to communicate supports */ 543 PetscCall(DMGetDimension(dm, &dim)); 544 cellDim = dim - cellHeight; 545 PetscCall(DMPlexGetDepth(dm, &depth)); 546 PetscCall(DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd)); 547 if (cEnd - cStart == 0) { 548 if (numVertices) *numVertices = 0; 549 if (offsets) *offsets = NULL; 550 if (adjacency) *adjacency = NULL; 551 PetscFunctionReturn(0); 552 } 553 numCells = cEnd - cStart; 554 faceDepth = depth - cellHeight; 555 if (dim == depth) { 556 PetscInt f, fStart, fEnd; 557 558 PetscCall(PetscCalloc1(numCells+1, &off)); 559 /* Count neighboring cells */ 560 PetscCall(DMPlexGetHeightStratum(dm, cellHeight+1, &fStart, &fEnd)); 561 for (f = fStart; f < fEnd; ++f) { 562 const PetscInt *support; 563 PetscInt supportSize; 564 PetscCall(DMPlexGetSupportSize(dm, f, &supportSize)); 565 PetscCall(DMPlexGetSupport(dm, f, &support)); 566 if (supportSize == 2) { 567 PetscInt numChildren; 568 569 PetscCall(DMPlexGetTreeChildren(dm,f,&numChildren,NULL)); 570 if (!numChildren) { 571 ++off[support[0]-cStart+1]; 572 ++off[support[1]-cStart+1]; 573 } 574 } 575 } 576 /* Prefix sum */ 577 for (c = 1; c <= numCells; ++c) off[c] += off[c-1]; 578 if (adjacency) { 579 PetscInt *tmp; 580 581 PetscCall(PetscMalloc1(off[numCells], &adj)); 582 PetscCall(PetscMalloc1(numCells+1, &tmp)); 583 PetscCall(PetscArraycpy(tmp, off, numCells+1)); 584 /* Get neighboring cells */ 585 for (f = fStart; f < fEnd; ++f) { 586 const PetscInt *support; 587 PetscInt supportSize; 588 PetscCall(DMPlexGetSupportSize(dm, f, &supportSize)); 589 PetscCall(DMPlexGetSupport(dm, f, &support)); 590 if (supportSize == 2) { 591 PetscInt numChildren; 592 593 PetscCall(DMPlexGetTreeChildren(dm,f,&numChildren,NULL)); 594 if (!numChildren) { 595 adj[tmp[support[0]-cStart]++] = support[1]; 596 adj[tmp[support[1]-cStart]++] = support[0]; 597 } 598 } 599 } 600 for (c = 0; c < cEnd-cStart; ++c) PetscAssert(tmp[c] == off[c+1],PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %" PetscInt_FMT " != %" PetscInt_FMT " for cell %" PetscInt_FMT, tmp[c], off[c], c+cStart); 601 PetscCall(PetscFree(tmp)); 602 } 603 if (numVertices) *numVertices = numCells; 604 if (offsets) *offsets = off; 605 if (adjacency) *adjacency = adj; 606 PetscFunctionReturn(0); 607 } 608 /* Setup face recognition */ 609 if (faceDepth == 1) { 610 PetscInt cornersSeen[30] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; /* Could use PetscBT */ 611 612 for (c = cStart; c < cEnd; ++c) { 613 PetscInt corners; 614 615 PetscCall(DMPlexGetConeSize(dm, c, &corners)); 616 if (!cornersSeen[corners]) { 617 PetscInt nFV; 618 619 PetscCheck(numFaceCases < maxFaceCases,PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases"); 620 cornersSeen[corners] = 1; 621 622 PetscCall(DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV)); 623 624 numFaceVertices[numFaceCases++] = nFV; 625 } 626 } 627 } 628 PetscCall(PetscCalloc1(numCells+1, &off)); 629 /* Count neighboring cells */ 630 for (cell = cStart; cell < cEnd; ++cell) { 631 PetscInt numNeighbors = PETSC_DETERMINE, n; 632 633 PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells)); 634 /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */ 635 for (n = 0; n < numNeighbors; ++n) { 636 PetscInt cellPair[2]; 637 PetscBool found = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE; 638 PetscInt meetSize = 0; 639 const PetscInt *meet = NULL; 640 641 cellPair[0] = cell; cellPair[1] = neighborCells[n]; 642 if (cellPair[0] == cellPair[1]) continue; 643 if (!found) { 644 PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet)); 645 if (meetSize) { 646 PetscInt f; 647 648 for (f = 0; f < numFaceCases; ++f) { 649 if (numFaceVertices[f] == meetSize) { 650 found = PETSC_TRUE; 651 break; 652 } 653 } 654 } 655 PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet)); 656 } 657 if (found) ++off[cell-cStart+1]; 658 } 659 } 660 /* Prefix sum */ 661 for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell-1]; 662 663 if (adjacency) { 664 PetscCall(PetscMalloc1(off[numCells], &adj)); 665 /* Get neighboring cells */ 666 for (cell = cStart; cell < cEnd; ++cell) { 667 PetscInt numNeighbors = PETSC_DETERMINE, n; 668 PetscInt cellOffset = 0; 669 670 PetscCall(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells)); 671 /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */ 672 for (n = 0; n < numNeighbors; ++n) { 673 PetscInt cellPair[2]; 674 PetscBool found = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE; 675 PetscInt meetSize = 0; 676 const PetscInt *meet = NULL; 677 678 cellPair[0] = cell; cellPair[1] = neighborCells[n]; 679 if (cellPair[0] == cellPair[1]) continue; 680 if (!found) { 681 PetscCall(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet)); 682 if (meetSize) { 683 PetscInt f; 684 685 for (f = 0; f < numFaceCases; ++f) { 686 if (numFaceVertices[f] == meetSize) { 687 found = PETSC_TRUE; 688 break; 689 } 690 } 691 } 692 PetscCall(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet)); 693 } 694 if (found) { 695 adj[off[cell-cStart]+cellOffset] = neighborCells[n]; 696 ++cellOffset; 697 } 698 } 699 } 700 } 701 PetscCall(PetscFree(neighborCells)); 702 if (numVertices) *numVertices = numCells; 703 if (offsets) *offsets = off; 704 if (adjacency) *adjacency = adj; 705 PetscFunctionReturn(0); 706 } 707 708 /*@ 709 PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh 710 711 Collective on PetscPartitioner 712 713 Input Parameters: 714 + part - The PetscPartitioner 715 . targetSection - The PetscSection describing the absolute weight of each partition (can be NULL) 716 - dm - The mesh DM 717 718 Output Parameters: 719 + partSection - The PetscSection giving the division of points by partition 720 - partition - The list of points by partition 721 722 Notes: 723 If the DM has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified 724 by the section in the transitive closure of the point. 725 726 Level: developer 727 728 .seealso `DMPlexDistribute()`, `PetscPartitionerCreate()`, `PetscSectionCreate()`, `PetscSectionSetChart()`, `PetscPartitionerPartition()` 729 @*/ 730 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition) 731 { 732 PetscMPIInt size; 733 PetscBool isplex; 734 PetscSection vertSection = NULL; 735 736 PetscFunctionBegin; 737 PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1); 738 PetscValidHeaderSpecific(dm, DM_CLASSID, 2); 739 if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3); 740 PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4); 741 PetscValidPointer(partition, 5); 742 PetscCall(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 743 PetscCheck(isplex,PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Not for type %s",((PetscObject)dm)->type_name); 744 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject) part), &size)); 745 if (size == 1) { 746 PetscInt *points; 747 PetscInt cStart, cEnd, c; 748 749 PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd)); 750 PetscCall(PetscSectionReset(partSection)); 751 PetscCall(PetscSectionSetChart(partSection, 0, size)); 752 PetscCall(PetscSectionSetDof(partSection, 0, cEnd-cStart)); 753 PetscCall(PetscSectionSetUp(partSection)); 754 PetscCall(PetscMalloc1(cEnd-cStart, &points)); 755 for (c = cStart; c < cEnd; ++c) points[c] = c; 756 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject) part), cEnd-cStart, points, PETSC_OWN_POINTER, partition)); 757 PetscFunctionReturn(0); 758 } 759 if (part->height == 0) { 760 PetscInt numVertices = 0; 761 PetscInt *start = NULL; 762 PetscInt *adjacency = NULL; 763 IS globalNumbering; 764 765 if (!part->noGraph || part->viewGraph) { 766 PetscCall(DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering)); 767 } else { /* only compute the number of owned local vertices */ 768 const PetscInt *idxs; 769 PetscInt p, pStart, pEnd; 770 771 PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd)); 772 PetscCall(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering)); 773 PetscCall(ISGetIndices(globalNumbering, &idxs)); 774 for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1; 775 PetscCall(ISRestoreIndices(globalNumbering, &idxs)); 776 } 777 if (part->usevwgt) { 778 PetscSection section = dm->localSection, clSection = NULL; 779 IS clPoints = NULL; 780 const PetscInt *gid,*clIdx; 781 PetscInt v, p, pStart, pEnd; 782 783 /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */ 784 /* We do this only if the local section has been set */ 785 if (section) { 786 PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL)); 787 if (!clSection) { 788 PetscCall(DMPlexCreateClosureIndex(dm,NULL)); 789 } 790 PetscCall(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints)); 791 PetscCall(ISGetIndices(clPoints,&clIdx)); 792 } 793 PetscCall(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd)); 794 PetscCall(PetscSectionCreate(PETSC_COMM_SELF, &vertSection)); 795 PetscCall(PetscSectionSetChart(vertSection, 0, numVertices)); 796 if (globalNumbering) { 797 PetscCall(ISGetIndices(globalNumbering,&gid)); 798 } else gid = NULL; 799 for (p = pStart, v = 0; p < pEnd; ++p) { 800 PetscInt dof = 1; 801 802 /* skip cells in the overlap */ 803 if (gid && gid[p-pStart] < 0) continue; 804 805 if (section) { 806 PetscInt cl, clSize, clOff; 807 808 dof = 0; 809 PetscCall(PetscSectionGetDof(clSection, p, &clSize)); 810 PetscCall(PetscSectionGetOffset(clSection, p, &clOff)); 811 for (cl = 0; cl < clSize; cl+=2) { 812 PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */ 813 814 PetscCall(PetscSectionGetDof(section, clPoint, &clDof)); 815 dof += clDof; 816 } 817 } 818 PetscCheck(dof,PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of dofs for point %" PetscInt_FMT " in the local section should be positive",p); 819 PetscCall(PetscSectionSetDof(vertSection, v, dof)); 820 v++; 821 } 822 if (globalNumbering) { 823 PetscCall(ISRestoreIndices(globalNumbering,&gid)); 824 } 825 if (clPoints) { 826 PetscCall(ISRestoreIndices(clPoints,&clIdx)); 827 } 828 PetscCall(PetscSectionSetUp(vertSection)); 829 } 830 PetscCall(PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition)); 831 PetscCall(PetscFree(start)); 832 PetscCall(PetscFree(adjacency)); 833 if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */ 834 const PetscInt *globalNum; 835 const PetscInt *partIdx; 836 PetscInt *map, cStart, cEnd; 837 PetscInt *adjusted, i, localSize, offset; 838 IS newPartition; 839 840 PetscCall(ISGetLocalSize(*partition,&localSize)); 841 PetscCall(PetscMalloc1(localSize,&adjusted)); 842 PetscCall(ISGetIndices(globalNumbering,&globalNum)); 843 PetscCall(ISGetIndices(*partition,&partIdx)); 844 PetscCall(PetscMalloc1(localSize,&map)); 845 PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd)); 846 for (i = cStart, offset = 0; i < cEnd; i++) { 847 if (globalNum[i - cStart] >= 0) map[offset++] = i; 848 } 849 for (i = 0; i < localSize; i++) { 850 adjusted[i] = map[partIdx[i]]; 851 } 852 PetscCall(PetscFree(map)); 853 PetscCall(ISRestoreIndices(*partition,&partIdx)); 854 PetscCall(ISRestoreIndices(globalNumbering,&globalNum)); 855 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,localSize,adjusted,PETSC_OWN_POINTER,&newPartition)); 856 PetscCall(ISDestroy(&globalNumbering)); 857 PetscCall(ISDestroy(partition)); 858 *partition = newPartition; 859 } 860 } else SETERRQ(PetscObjectComm((PetscObject) part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %" PetscInt_FMT " for points to partition", part->height); 861 PetscCall(PetscSectionDestroy(&vertSection)); 862 PetscFunctionReturn(0); 863 } 864 865 /*@ 866 DMPlexGetPartitioner - Get the mesh partitioner 867 868 Not collective 869 870 Input Parameter: 871 . dm - The DM 872 873 Output Parameter: 874 . part - The PetscPartitioner 875 876 Level: developer 877 878 Note: This gets a borrowed reference, so the user should not destroy this PetscPartitioner. 879 880 .seealso `DMPlexDistribute()`, `DMPlexSetPartitioner()`, `PetscPartitionerDMPlexPartition()`, `PetscPartitionerCreate()` 881 @*/ 882 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part) 883 { 884 DM_Plex *mesh = (DM_Plex *) dm->data; 885 886 PetscFunctionBegin; 887 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 888 PetscValidPointer(part, 2); 889 *part = mesh->partitioner; 890 PetscFunctionReturn(0); 891 } 892 893 /*@ 894 DMPlexSetPartitioner - Set the mesh partitioner 895 896 logically collective on DM 897 898 Input Parameters: 899 + dm - The DM 900 - part - The partitioner 901 902 Level: developer 903 904 Note: Any existing PetscPartitioner will be destroyed. 905 906 .seealso `DMPlexDistribute()`, `DMPlexGetPartitioner()`, `PetscPartitionerCreate()` 907 @*/ 908 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part) 909 { 910 DM_Plex *mesh = (DM_Plex *) dm->data; 911 912 PetscFunctionBegin; 913 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 914 PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2); 915 PetscCall(PetscObjectReference((PetscObject)part)); 916 PetscCall(PetscPartitionerDestroy(&mesh->partitioner)); 917 mesh->partitioner = part; 918 PetscFunctionReturn(0); 919 } 920 921 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point) 922 { 923 const PetscInt *cone; 924 PetscInt coneSize, c; 925 PetscBool missing; 926 927 PetscFunctionBeginHot; 928 PetscCall(PetscHSetIQueryAdd(ht, point, &missing)); 929 if (missing) { 930 PetscCall(DMPlexGetCone(dm, point, &cone)); 931 PetscCall(DMPlexGetConeSize(dm, point, &coneSize)); 932 for (c = 0; c < coneSize; c++) { 933 PetscCall(DMPlexAddClosure_Private(dm, ht, cone[c])); 934 } 935 } 936 PetscFunctionReturn(0); 937 } 938 939 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down) 940 { 941 PetscFunctionBegin; 942 if (up) { 943 PetscInt parent; 944 945 PetscCall(DMPlexGetTreeParent(dm,point,&parent,NULL)); 946 if (parent != point) { 947 PetscInt closureSize, *closure = NULL, i; 948 949 PetscCall(DMPlexGetTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure)); 950 for (i = 0; i < closureSize; i++) { 951 PetscInt cpoint = closure[2*i]; 952 953 PetscCall(PetscHSetIAdd(ht, cpoint)); 954 PetscCall(DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_TRUE,PETSC_FALSE)); 955 } 956 PetscCall(DMPlexRestoreTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure)); 957 } 958 } 959 if (down) { 960 PetscInt numChildren; 961 const PetscInt *children; 962 963 PetscCall(DMPlexGetTreeChildren(dm,point,&numChildren,&children)); 964 if (numChildren) { 965 PetscInt i; 966 967 for (i = 0; i < numChildren; i++) { 968 PetscInt cpoint = children[i]; 969 970 PetscCall(PetscHSetIAdd(ht, cpoint)); 971 PetscCall(DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_FALSE,PETSC_TRUE)); 972 } 973 } 974 } 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point) 979 { 980 PetscInt parent; 981 982 PetscFunctionBeginHot; 983 PetscCall(DMPlexGetTreeParent(dm, point, &parent,NULL)); 984 if (point != parent) { 985 const PetscInt *cone; 986 PetscInt coneSize, c; 987 988 PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, parent)); 989 PetscCall(DMPlexAddClosure_Private(dm, ht, parent)); 990 PetscCall(DMPlexGetCone(dm, parent, &cone)); 991 PetscCall(DMPlexGetConeSize(dm, parent, &coneSize)); 992 for (c = 0; c < coneSize; c++) { 993 const PetscInt cp = cone[c]; 994 995 PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, cp)); 996 } 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point) 1002 { 1003 PetscInt i, numChildren; 1004 const PetscInt *children; 1005 1006 PetscFunctionBeginHot; 1007 PetscCall(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 1008 for (i = 0; i < numChildren; i++) { 1009 PetscCall(PetscHSetIAdd(ht, children[i])); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point) 1015 { 1016 const PetscInt *cone; 1017 PetscInt coneSize, c; 1018 1019 PetscFunctionBeginHot; 1020 PetscCall(PetscHSetIAdd(ht, point)); 1021 PetscCall(DMPlexAddClosureTree_Up_Private(dm, ht, point)); 1022 PetscCall(DMPlexAddClosureTree_Down_Private(dm, ht, point)); 1023 PetscCall(DMPlexGetCone(dm, point, &cone)); 1024 PetscCall(DMPlexGetConeSize(dm, point, &coneSize)); 1025 for (c = 0; c < coneSize; c++) { 1026 PetscCall(DMPlexAddClosureTree_Private(dm, ht, cone[c])); 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS) 1032 { 1033 DM_Plex *mesh = (DM_Plex *)dm->data; 1034 const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE; 1035 PetscInt nelems, *elems, off = 0, p; 1036 PetscHSetI ht = NULL; 1037 1038 PetscFunctionBegin; 1039 PetscCall(PetscHSetICreate(&ht)); 1040 PetscCall(PetscHSetIResize(ht, numPoints*16)); 1041 if (!hasTree) { 1042 for (p = 0; p < numPoints; ++p) { 1043 PetscCall(DMPlexAddClosure_Private(dm, ht, points[p])); 1044 } 1045 } else { 1046 #if 1 1047 for (p = 0; p < numPoints; ++p) { 1048 PetscCall(DMPlexAddClosureTree_Private(dm, ht, points[p])); 1049 } 1050 #else 1051 PetscInt *closure = NULL, closureSize, c; 1052 for (p = 0; p < numPoints; ++p) { 1053 PetscCall(DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure)); 1054 for (c = 0; c < closureSize*2; c += 2) { 1055 PetscCall(PetscHSetIAdd(ht, closure[c])); 1056 if (hasTree) PetscCall(DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE)); 1057 } 1058 } 1059 if (closure) PetscCall(DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure)); 1060 #endif 1061 } 1062 PetscCall(PetscHSetIGetSize(ht, &nelems)); 1063 PetscCall(PetscMalloc1(nelems, &elems)); 1064 PetscCall(PetscHSetIGetElems(ht, &off, elems)); 1065 PetscCall(PetscHSetIDestroy(&ht)); 1066 PetscCall(PetscSortInt(nelems, elems)); 1067 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS)); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /*@ 1072 DMPlexPartitionLabelClosure - Add the closure of all points to the partition label 1073 1074 Input Parameters: 1075 + dm - The DM 1076 - label - DMLabel assigning ranks to remote roots 1077 1078 Level: developer 1079 1080 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1081 @*/ 1082 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label) 1083 { 1084 IS rankIS, pointIS, closureIS; 1085 const PetscInt *ranks, *points; 1086 PetscInt numRanks, numPoints, r; 1087 1088 PetscFunctionBegin; 1089 PetscCall(DMLabelGetValueIS(label, &rankIS)); 1090 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1091 PetscCall(ISGetIndices(rankIS, &ranks)); 1092 for (r = 0; r < numRanks; ++r) { 1093 const PetscInt rank = ranks[r]; 1094 PetscCall(DMLabelGetStratumIS(label, rank, &pointIS)); 1095 PetscCall(ISGetLocalSize(pointIS, &numPoints)); 1096 PetscCall(ISGetIndices(pointIS, &points)); 1097 PetscCall(DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS)); 1098 PetscCall(ISRestoreIndices(pointIS, &points)); 1099 PetscCall(ISDestroy(&pointIS)); 1100 PetscCall(DMLabelSetStratumIS(label, rank, closureIS)); 1101 PetscCall(ISDestroy(&closureIS)); 1102 } 1103 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1104 PetscCall(ISDestroy(&rankIS)); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 /*@ 1109 DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label 1110 1111 Input Parameters: 1112 + dm - The DM 1113 - label - DMLabel assigning ranks to remote roots 1114 1115 Level: developer 1116 1117 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1118 @*/ 1119 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label) 1120 { 1121 IS rankIS, pointIS; 1122 const PetscInt *ranks, *points; 1123 PetscInt numRanks, numPoints, r, p, a, adjSize; 1124 PetscInt *adj = NULL; 1125 1126 PetscFunctionBegin; 1127 PetscCall(DMLabelGetValueIS(label, &rankIS)); 1128 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1129 PetscCall(ISGetIndices(rankIS, &ranks)); 1130 for (r = 0; r < numRanks; ++r) { 1131 const PetscInt rank = ranks[r]; 1132 1133 PetscCall(DMLabelGetStratumIS(label, rank, &pointIS)); 1134 PetscCall(ISGetLocalSize(pointIS, &numPoints)); 1135 PetscCall(ISGetIndices(pointIS, &points)); 1136 for (p = 0; p < numPoints; ++p) { 1137 adjSize = PETSC_DETERMINE; 1138 PetscCall(DMPlexGetAdjacency(dm, points[p], &adjSize, &adj)); 1139 for (a = 0; a < adjSize; ++a) PetscCall(DMLabelSetValue(label, adj[a], rank)); 1140 } 1141 PetscCall(ISRestoreIndices(pointIS, &points)); 1142 PetscCall(ISDestroy(&pointIS)); 1143 } 1144 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1145 PetscCall(ISDestroy(&rankIS)); 1146 PetscCall(PetscFree(adj)); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 /*@ 1151 DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point SF 1152 1153 Input Parameters: 1154 + dm - The DM 1155 - label - DMLabel assigning ranks to remote roots 1156 1157 Level: developer 1158 1159 Note: This is required when generating multi-level overlaps to capture 1160 overlap points from non-neighbouring partitions. 1161 1162 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1163 @*/ 1164 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label) 1165 { 1166 MPI_Comm comm; 1167 PetscMPIInt rank; 1168 PetscSF sfPoint; 1169 DMLabel lblRoots, lblLeaves; 1170 IS rankIS, pointIS; 1171 const PetscInt *ranks; 1172 PetscInt numRanks, r; 1173 1174 PetscFunctionBegin; 1175 PetscCall(PetscObjectGetComm((PetscObject) dm, &comm)); 1176 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1177 PetscCall(DMGetPointSF(dm, &sfPoint)); 1178 /* Pull point contributions from remote leaves into local roots */ 1179 PetscCall(DMLabelGather(label, sfPoint, &lblLeaves)); 1180 PetscCall(DMLabelGetValueIS(lblLeaves, &rankIS)); 1181 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1182 PetscCall(ISGetIndices(rankIS, &ranks)); 1183 for (r = 0; r < numRanks; ++r) { 1184 const PetscInt remoteRank = ranks[r]; 1185 if (remoteRank == rank) continue; 1186 PetscCall(DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS)); 1187 PetscCall(DMLabelInsertIS(label, pointIS, remoteRank)); 1188 PetscCall(ISDestroy(&pointIS)); 1189 } 1190 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1191 PetscCall(ISDestroy(&rankIS)); 1192 PetscCall(DMLabelDestroy(&lblLeaves)); 1193 /* Push point contributions from roots into remote leaves */ 1194 PetscCall(DMLabelDistribute(label, sfPoint, &lblRoots)); 1195 PetscCall(DMLabelGetValueIS(lblRoots, &rankIS)); 1196 PetscCall(ISGetLocalSize(rankIS, &numRanks)); 1197 PetscCall(ISGetIndices(rankIS, &ranks)); 1198 for (r = 0; r < numRanks; ++r) { 1199 const PetscInt remoteRank = ranks[r]; 1200 if (remoteRank == rank) continue; 1201 PetscCall(DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS)); 1202 PetscCall(DMLabelInsertIS(label, pointIS, remoteRank)); 1203 PetscCall(ISDestroy(&pointIS)); 1204 } 1205 PetscCall(ISRestoreIndices(rankIS, &ranks)); 1206 PetscCall(ISDestroy(&rankIS)); 1207 PetscCall(DMLabelDestroy(&lblRoots)); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /*@ 1212 DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label 1213 1214 Input Parameters: 1215 + dm - The DM 1216 . rootLabel - DMLabel assigning ranks to local roots 1217 - processSF - A star forest mapping into the local index on each remote rank 1218 1219 Output Parameter: 1220 . leafLabel - DMLabel assigning ranks to remote roots 1221 1222 Note: The rootLabel defines a send pattern by mapping local points to remote target ranks. The 1223 resulting leafLabel is a receiver mapping of remote roots to their parent rank. 1224 1225 Level: developer 1226 1227 .seealso: `DMPlexPartitionLabelCreateSF()`, `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1228 @*/ 1229 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel) 1230 { 1231 MPI_Comm comm; 1232 PetscMPIInt rank, size, r; 1233 PetscInt p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize; 1234 PetscSF sfPoint; 1235 PetscSection rootSection; 1236 PetscSFNode *rootPoints, *leafPoints; 1237 const PetscSFNode *remote; 1238 const PetscInt *local, *neighbors; 1239 IS valueIS; 1240 PetscBool mpiOverflow = PETSC_FALSE; 1241 1242 PetscFunctionBegin; 1243 PetscCall(PetscLogEventBegin(DMPLEX_PartLabelInvert,dm,0,0,0)); 1244 PetscCall(PetscObjectGetComm((PetscObject) dm, &comm)); 1245 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1246 PetscCallMPI(MPI_Comm_size(comm, &size)); 1247 PetscCall(DMGetPointSF(dm, &sfPoint)); 1248 1249 /* Convert to (point, rank) and use actual owners */ 1250 PetscCall(PetscSectionCreate(comm, &rootSection)); 1251 PetscCall(PetscSectionSetChart(rootSection, 0, size)); 1252 PetscCall(DMLabelGetValueIS(rootLabel, &valueIS)); 1253 PetscCall(ISGetLocalSize(valueIS, &numNeighbors)); 1254 PetscCall(ISGetIndices(valueIS, &neighbors)); 1255 for (n = 0; n < numNeighbors; ++n) { 1256 PetscCall(DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints)); 1257 PetscCall(PetscSectionAddDof(rootSection, neighbors[n], numPoints)); 1258 } 1259 PetscCall(PetscSectionSetUp(rootSection)); 1260 PetscCall(PetscSectionGetStorageSize(rootSection, &rootSize)); 1261 PetscCall(PetscMalloc1(rootSize, &rootPoints)); 1262 PetscCall(PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote)); 1263 for (n = 0; n < numNeighbors; ++n) { 1264 IS pointIS; 1265 const PetscInt *points; 1266 1267 PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off)); 1268 PetscCall(DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS)); 1269 PetscCall(ISGetLocalSize(pointIS, &numPoints)); 1270 PetscCall(ISGetIndices(pointIS, &points)); 1271 for (p = 0; p < numPoints; ++p) { 1272 if (local) PetscCall(PetscFindInt(points[p], nleaves, local, &l)); 1273 else {l = -1;} 1274 if (l >= 0) {rootPoints[off+p] = remote[l];} 1275 else {rootPoints[off+p].index = points[p]; rootPoints[off+p].rank = rank;} 1276 } 1277 PetscCall(ISRestoreIndices(pointIS, &points)); 1278 PetscCall(ISDestroy(&pointIS)); 1279 } 1280 1281 /* Try to communicate overlap using All-to-All */ 1282 if (!processSF) { 1283 PetscInt64 counter = 0; 1284 PetscBool locOverflow = PETSC_FALSE; 1285 PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls; 1286 1287 PetscCall(PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls)); 1288 for (n = 0; n < numNeighbors; ++n) { 1289 PetscCall(PetscSectionGetDof(rootSection, neighbors[n], &dof)); 1290 PetscCall(PetscSectionGetOffset(rootSection, neighbors[n], &off)); 1291 #if defined(PETSC_USE_64BIT_INDICES) 1292 if (dof > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;} 1293 if (off > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;} 1294 #endif 1295 scounts[neighbors[n]] = (PetscMPIInt) dof; 1296 sdispls[neighbors[n]] = (PetscMPIInt) off; 1297 } 1298 PetscCallMPI(MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm)); 1299 for (r = 0; r < size; ++r) { rdispls[r] = (int)counter; counter += rcounts[r]; } 1300 if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE; 1301 PetscCallMPI(MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm)); 1302 if (!mpiOverflow) { 1303 PetscCall(PetscInfo(dm,"Using Alltoallv for mesh distribution\n")); 1304 leafSize = (PetscInt) counter; 1305 PetscCall(PetscMalloc1(leafSize, &leafPoints)); 1306 PetscCallMPI(MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm)); 1307 } 1308 PetscCall(PetscFree4(scounts, sdispls, rcounts, rdispls)); 1309 } 1310 1311 /* Communicate overlap using process star forest */ 1312 if (processSF || mpiOverflow) { 1313 PetscSF procSF; 1314 PetscSection leafSection; 1315 1316 if (processSF) { 1317 PetscCall(PetscInfo(dm,"Using processSF for mesh distribution\n")); 1318 PetscCall(PetscObjectReference((PetscObject)processSF)); 1319 procSF = processSF; 1320 } else { 1321 PetscCall(PetscInfo(dm,"Using processSF for mesh distribution (MPI overflow)\n")); 1322 PetscCall(PetscSFCreate(comm,&procSF)); 1323 PetscCall(PetscSFSetGraphWithPattern(procSF,NULL,PETSCSF_PATTERN_ALLTOALL)); 1324 } 1325 1326 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection)); 1327 PetscCall(DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void**) &leafPoints)); 1328 PetscCall(PetscSectionGetStorageSize(leafSection, &leafSize)); 1329 PetscCall(PetscSectionDestroy(&leafSection)); 1330 PetscCall(PetscSFDestroy(&procSF)); 1331 } 1332 1333 for (p = 0; p < leafSize; p++) { 1334 PetscCall(DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank)); 1335 } 1336 1337 PetscCall(ISRestoreIndices(valueIS, &neighbors)); 1338 PetscCall(ISDestroy(&valueIS)); 1339 PetscCall(PetscSectionDestroy(&rootSection)); 1340 PetscCall(PetscFree(rootPoints)); 1341 PetscCall(PetscFree(leafPoints)); 1342 PetscCall(PetscLogEventEnd(DMPLEX_PartLabelInvert,dm,0,0,0)); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 /*@ 1347 DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points 1348 1349 Input Parameters: 1350 + dm - The DM 1351 - label - DMLabel assigning ranks to remote roots 1352 1353 Output Parameter: 1354 . sf - The star forest communication context encapsulating the defined mapping 1355 1356 Note: The incoming label is a receiver mapping of remote points to their parent rank. 1357 1358 Level: developer 1359 1360 .seealso: `DMPlexDistribute()`, `DMPlexCreateOverlap()` 1361 @*/ 1362 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf) 1363 { 1364 PetscMPIInt rank; 1365 PetscInt n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors; 1366 PetscSFNode *remotePoints; 1367 IS remoteRootIS, neighborsIS; 1368 const PetscInt *remoteRoots, *neighbors; 1369 1370 PetscFunctionBegin; 1371 PetscCall(PetscLogEventBegin(DMPLEX_PartLabelCreateSF,dm,0,0,0)); 1372 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 1373 1374 PetscCall(DMLabelGetValueIS(label, &neighborsIS)); 1375 #if 0 1376 { 1377 IS is; 1378 PetscCall(ISDuplicate(neighborsIS, &is)); 1379 PetscCall(ISSort(is)); 1380 PetscCall(ISDestroy(&neighborsIS)); 1381 neighborsIS = is; 1382 } 1383 #endif 1384 PetscCall(ISGetLocalSize(neighborsIS, &nNeighbors)); 1385 PetscCall(ISGetIndices(neighborsIS, &neighbors)); 1386 for (numRemote = 0, n = 0; n < nNeighbors; ++n) { 1387 PetscCall(DMLabelGetStratumSize(label, neighbors[n], &numPoints)); 1388 numRemote += numPoints; 1389 } 1390 PetscCall(PetscMalloc1(numRemote, &remotePoints)); 1391 /* Put owned points first */ 1392 PetscCall(DMLabelGetStratumSize(label, rank, &numPoints)); 1393 if (numPoints > 0) { 1394 PetscCall(DMLabelGetStratumIS(label, rank, &remoteRootIS)); 1395 PetscCall(ISGetIndices(remoteRootIS, &remoteRoots)); 1396 for (p = 0; p < numPoints; p++) { 1397 remotePoints[idx].index = remoteRoots[p]; 1398 remotePoints[idx].rank = rank; 1399 idx++; 1400 } 1401 PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots)); 1402 PetscCall(ISDestroy(&remoteRootIS)); 1403 } 1404 /* Now add remote points */ 1405 for (n = 0; n < nNeighbors; ++n) { 1406 const PetscInt nn = neighbors[n]; 1407 1408 PetscCall(DMLabelGetStratumSize(label, nn, &numPoints)); 1409 if (nn == rank || numPoints <= 0) continue; 1410 PetscCall(DMLabelGetStratumIS(label, nn, &remoteRootIS)); 1411 PetscCall(ISGetIndices(remoteRootIS, &remoteRoots)); 1412 for (p = 0; p < numPoints; p++) { 1413 remotePoints[idx].index = remoteRoots[p]; 1414 remotePoints[idx].rank = nn; 1415 idx++; 1416 } 1417 PetscCall(ISRestoreIndices(remoteRootIS, &remoteRoots)); 1418 PetscCall(ISDestroy(&remoteRootIS)); 1419 } 1420 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) dm), sf)); 1421 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 1422 PetscCall(PetscSFSetGraph(*sf, pEnd-pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER)); 1423 PetscCall(PetscSFSetUp(*sf)); 1424 PetscCall(ISDestroy(&neighborsIS)); 1425 PetscCall(PetscLogEventEnd(DMPLEX_PartLabelCreateSF,dm,0,0,0)); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #if defined(PETSC_HAVE_PARMETIS) 1430 #include <parmetis.h> 1431 #endif 1432 1433 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors 1434 * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take 1435 * them out in that case. */ 1436 #if defined(PETSC_HAVE_PARMETIS) 1437 /*@C 1438 1439 DMPlexRewriteSF - Rewrites the ownership of the SF of a DM (in place). 1440 1441 Input parameters: 1442 + dm - The DMPlex object. 1443 . n - The number of points. 1444 . pointsToRewrite - The points in the SF whose ownership will change. 1445 . targetOwners - New owner for each element in pointsToRewrite. 1446 - degrees - Degrees of the points in the SF as obtained by PetscSFComputeDegreeBegin/PetscSFComputeDegreeEnd. 1447 1448 Level: developer 1449 1450 @*/ 1451 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees) 1452 { 1453 PetscInt pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs; 1454 PetscInt *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew; 1455 PetscSFNode *leafLocationsNew; 1456 const PetscSFNode *iremote; 1457 const PetscInt *ilocal; 1458 PetscBool *isLeaf; 1459 PetscSF sf; 1460 MPI_Comm comm; 1461 PetscMPIInt rank, size; 1462 1463 PetscFunctionBegin; 1464 PetscCall(PetscObjectGetComm((PetscObject) dm, &comm)); 1465 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1466 PetscCallMPI(MPI_Comm_size(comm, &size)); 1467 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 1468 1469 PetscCall(DMGetPointSF(dm, &sf)); 1470 PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote)); 1471 PetscCall(PetscMalloc1(pEnd-pStart, &isLeaf)); 1472 for (i=0; i<pEnd-pStart; i++) { 1473 isLeaf[i] = PETSC_FALSE; 1474 } 1475 for (i=0; i<nleafs; i++) { 1476 isLeaf[ilocal[i]-pStart] = PETSC_TRUE; 1477 } 1478 1479 PetscCall(PetscMalloc1(pEnd-pStart+1, &cumSumDegrees)); 1480 cumSumDegrees[0] = 0; 1481 for (i=1; i<=pEnd-pStart; i++) { 1482 cumSumDegrees[i] = cumSumDegrees[i-1] + degrees[i-1]; 1483 } 1484 sumDegrees = cumSumDegrees[pEnd-pStart]; 1485 /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */ 1486 1487 PetscCall(PetscMalloc1(sumDegrees, &locationsOfLeafs)); 1488 PetscCall(PetscMalloc1(pEnd-pStart, &rankOnLeafs)); 1489 for (i=0; i<pEnd-pStart; i++) { 1490 rankOnLeafs[i] = rank; 1491 } 1492 PetscCall(PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs)); 1493 PetscCall(PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs)); 1494 PetscCall(PetscFree(rankOnLeafs)); 1495 1496 /* get the remote local points of my leaves */ 1497 PetscCall(PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs)); 1498 PetscCall(PetscMalloc1(pEnd-pStart, &points)); 1499 for (i=0; i<pEnd-pStart; i++) { 1500 points[i] = pStart+i; 1501 } 1502 PetscCall(PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs)); 1503 PetscCall(PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs)); 1504 PetscCall(PetscFree(points)); 1505 /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */ 1506 PetscCall(PetscMalloc1(pEnd-pStart, &newOwners)); 1507 PetscCall(PetscMalloc1(pEnd-pStart, &newNumbers)); 1508 for (i=0; i<pEnd-pStart; i++) { 1509 newOwners[i] = -1; 1510 newNumbers[i] = -1; 1511 } 1512 { 1513 PetscInt oldNumber, newNumber, oldOwner, newOwner; 1514 for (i=0; i<n; i++) { 1515 oldNumber = pointsToRewrite[i]; 1516 newNumber = -1; 1517 oldOwner = rank; 1518 newOwner = targetOwners[i]; 1519 if (oldOwner == newOwner) { 1520 newNumber = oldNumber; 1521 } else { 1522 for (j=0; j<degrees[oldNumber]; j++) { 1523 if (locationsOfLeafs[cumSumDegrees[oldNumber]+j] == newOwner) { 1524 newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber]+j]; 1525 break; 1526 } 1527 } 1528 } 1529 PetscCheck(newNumber != -1,PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex."); 1530 1531 newOwners[oldNumber] = newOwner; 1532 newNumbers[oldNumber] = newNumber; 1533 } 1534 } 1535 PetscCall(PetscFree(cumSumDegrees)); 1536 PetscCall(PetscFree(locationsOfLeafs)); 1537 PetscCall(PetscFree(remoteLocalPointOfLeafs)); 1538 1539 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners,MPI_REPLACE)); 1540 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners,MPI_REPLACE)); 1541 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers,MPI_REPLACE)); 1542 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers,MPI_REPLACE)); 1543 1544 /* Now count how many leafs we have on each processor. */ 1545 leafCounter=0; 1546 for (i=0; i<pEnd-pStart; i++) { 1547 if (newOwners[i] >= 0) { 1548 if (newOwners[i] != rank) { 1549 leafCounter++; 1550 } 1551 } else { 1552 if (isLeaf[i]) { 1553 leafCounter++; 1554 } 1555 } 1556 } 1557 1558 /* Now set up the new sf by creating the leaf arrays */ 1559 PetscCall(PetscMalloc1(leafCounter, &leafsNew)); 1560 PetscCall(PetscMalloc1(leafCounter, &leafLocationsNew)); 1561 1562 leafCounter = 0; 1563 counter = 0; 1564 for (i=0; i<pEnd-pStart; i++) { 1565 if (newOwners[i] >= 0) { 1566 if (newOwners[i] != rank) { 1567 leafsNew[leafCounter] = i; 1568 leafLocationsNew[leafCounter].rank = newOwners[i]; 1569 leafLocationsNew[leafCounter].index = newNumbers[i]; 1570 leafCounter++; 1571 } 1572 } else { 1573 if (isLeaf[i]) { 1574 leafsNew[leafCounter] = i; 1575 leafLocationsNew[leafCounter].rank = iremote[counter].rank; 1576 leafLocationsNew[leafCounter].index = iremote[counter].index; 1577 leafCounter++; 1578 } 1579 } 1580 if (isLeaf[i]) { 1581 counter++; 1582 } 1583 } 1584 1585 PetscCall(PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER)); 1586 PetscCall(PetscFree(newOwners)); 1587 PetscCall(PetscFree(newNumbers)); 1588 PetscCall(PetscFree(isLeaf)); 1589 PetscFunctionReturn(0); 1590 } 1591 1592 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer) 1593 { 1594 PetscInt *distribution, min, max, sum; 1595 PetscMPIInt rank, size; 1596 1597 PetscFunctionBegin; 1598 PetscCallMPI(MPI_Comm_size(comm, &size)); 1599 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1600 PetscCall(PetscCalloc1(size, &distribution)); 1601 for (PetscInt i=0; i<n; i++) { 1602 if (part) distribution[part[i]] += vtxwgt[skip*i]; 1603 else distribution[rank] += vtxwgt[skip*i]; 1604 } 1605 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm)); 1606 min = distribution[0]; 1607 max = distribution[0]; 1608 sum = distribution[0]; 1609 for (PetscInt i=1; i<size; i++) { 1610 if (distribution[i]<min) min=distribution[i]; 1611 if (distribution[i]>max) max=distribution[i]; 1612 sum += distribution[i]; 1613 } 1614 PetscCall(PetscViewerASCIIPrintf(viewer, "Min: %" PetscInt_FMT ", Avg: %" PetscInt_FMT ", Max: %" PetscInt_FMT ", Balance: %f\n", min, sum/size, max, (max*1.*size)/sum)); 1615 PetscCall(PetscFree(distribution)); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #endif 1620 1621 /*@ 1622 DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the PointSF of the DM inplace. 1623 1624 Input parameters: 1625 + dm - The DMPlex object. 1626 . entityDepth - depth of the entity to balance (0 -> balance vertices). 1627 . useInitialGuess - whether to use the current distribution as initial guess (only used by ParMETIS). 1628 - parallel - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS. 1629 1630 Output parameters: 1631 . success - whether the graph partitioning was successful or not, optional. Unsuccessful simply means no change to the partitioning 1632 1633 Options Database: 1634 + -dm_plex_rebalance_shared_points_parmetis - Use ParMetis instead of Metis for the partitioner 1635 . -dm_plex_rebalance_shared_points_use_initial_guess - Use current partition to bootstrap ParMetis partition 1636 . -dm_plex_rebalance_shared_points_use_mat_partitioning - Use the MatPartitioning object to perform the partition, the prefix for those operations is -dm_plex_rebalance_shared_points_ 1637 - -dm_plex_rebalance_shared_points_monitor - Monitor the shared points rebalance process 1638 1639 Developer Notes: 1640 This should use MatPartitioning to allow the use of any partitioner and not be hardwired to use ParMetis 1641 1642 Level: intermediate 1643 @*/ 1644 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success) 1645 { 1646 #if defined(PETSC_HAVE_PARMETIS) 1647 PetscSF sf; 1648 PetscInt ierr, i, j, idx, jdx; 1649 PetscInt eBegin, eEnd, nroots, nleafs, pStart, pEnd; 1650 const PetscInt *degrees, *ilocal; 1651 const PetscSFNode *iremote; 1652 PetscBool *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned; 1653 PetscInt numExclusivelyOwned, numNonExclusivelyOwned; 1654 PetscMPIInt rank, size; 1655 PetscInt *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers; 1656 const PetscInt *cumSumVertices; 1657 PetscInt offset, counter; 1658 PetscInt *vtxwgt; 1659 const PetscInt *xadj, *adjncy; 1660 PetscInt *part, *options; 1661 PetscInt nparts, wgtflag, numflag, ncon, edgecut; 1662 real_t *ubvec; 1663 PetscInt *firstVertices, *renumbering; 1664 PetscInt failed, failedGlobal; 1665 MPI_Comm comm; 1666 Mat A; 1667 PetscViewer viewer; 1668 PetscViewerFormat format; 1669 PetscLayout layout; 1670 real_t *tpwgts; 1671 PetscMPIInt *counts, *mpiCumSumVertices; 1672 PetscInt *pointsToRewrite; 1673 PetscInt numRows; 1674 PetscBool done,usematpartitioning = PETSC_FALSE; 1675 IS ispart = NULL; 1676 MatPartitioning mp; 1677 const char *prefix; 1678 1679 PetscFunctionBegin; 1680 PetscCall(PetscObjectGetComm((PetscObject) dm, &comm)); 1681 PetscCallMPI(MPI_Comm_size(comm, &size)); 1682 if (size==1) { 1683 if (success) *success = PETSC_TRUE; 1684 PetscFunctionReturn(0); 1685 } 1686 if (success) *success = PETSC_FALSE; 1687 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1688 1689 parallel = PETSC_FALSE; 1690 useInitialGuess = PETSC_FALSE; 1691 PetscObjectOptionsBegin((PetscObject)dm); 1692 PetscCall(PetscOptionsName("-dm_plex_rebalance_shared_points_parmetis","Use ParMetis instead of Metis for the partitioner","DMPlexRebalanceSharedPoints",¶llel)); 1693 PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_initial_guess","Use current partition to bootstrap ParMetis partition","DMPlexRebalanceSharedPoints",useInitialGuess,&useInitialGuess,NULL)); 1694 PetscCall(PetscOptionsBool("-dm_plex_rebalance_shared_points_use_mat_partitioning","Use the MatPartitioning object to partition","DMPlexRebalanceSharedPoints",usematpartitioning,&usematpartitioning,NULL)); 1695 PetscCall(PetscOptionsViewer("-dm_plex_rebalance_shared_points_monitor","Monitor the shared points rebalance process","DMPlexRebalanceSharedPoints",&viewer,&format,NULL)); 1696 PetscOptionsEnd(); 1697 if (viewer) PetscCall(PetscViewerPushFormat(viewer,format)); 1698 1699 PetscCall(PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 1700 1701 PetscCall(DMGetOptionsPrefix(dm,&prefix)); 1702 PetscCall(PetscOptionsGetViewer(comm,((PetscObject)dm)->options, prefix,"-dm_rebalance_partition_view",&viewer,&format,NULL)); 1703 if (viewer) PetscCall(PetscViewerPushFormat(viewer,format)); 1704 1705 /* Figure out all points in the plex that we are interested in balancing. */ 1706 PetscCall(DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd)); 1707 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 1708 PetscCall(PetscMalloc1(pEnd-pStart, &toBalance)); 1709 for (i=0; i<pEnd-pStart; i++) { 1710 toBalance[i] = (PetscBool)(i>=eBegin && i<eEnd); 1711 } 1712 1713 /* There are three types of points: 1714 * exclusivelyOwned: points that are owned by this process and only seen by this process 1715 * nonExclusivelyOwned: points that are owned by this process but seen by at least another process 1716 * leaf: a point that is seen by this process but owned by a different process 1717 */ 1718 PetscCall(DMGetPointSF(dm, &sf)); 1719 PetscCall(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote)); 1720 PetscCall(PetscMalloc1(pEnd-pStart, &isLeaf)); 1721 PetscCall(PetscMalloc1(pEnd-pStart, &isNonExclusivelyOwned)); 1722 PetscCall(PetscMalloc1(pEnd-pStart, &isExclusivelyOwned)); 1723 for (i=0; i<pEnd-pStart; i++) { 1724 isNonExclusivelyOwned[i] = PETSC_FALSE; 1725 isExclusivelyOwned[i] = PETSC_FALSE; 1726 isLeaf[i] = PETSC_FALSE; 1727 } 1728 1729 /* mark all the leafs */ 1730 for (i=0; i<nleafs; i++) { 1731 isLeaf[ilocal[i]-pStart] = PETSC_TRUE; 1732 } 1733 1734 /* for an owned point, we can figure out whether another processor sees it or 1735 * not by calculating its degree */ 1736 PetscCall(PetscSFComputeDegreeBegin(sf, °rees)); 1737 PetscCall(PetscSFComputeDegreeEnd(sf, °rees)); 1738 numExclusivelyOwned = 0; 1739 numNonExclusivelyOwned = 0; 1740 for (i=0; i<pEnd-pStart; i++) { 1741 if (toBalance[i]) { 1742 if (degrees[i] > 0) { 1743 isNonExclusivelyOwned[i] = PETSC_TRUE; 1744 numNonExclusivelyOwned += 1; 1745 } else { 1746 if (!isLeaf[i]) { 1747 isExclusivelyOwned[i] = PETSC_TRUE; 1748 numExclusivelyOwned += 1; 1749 } 1750 } 1751 } 1752 } 1753 1754 /* Build a graph with one vertex per core representing the 1755 * exclusively owned points and then one vertex per nonExclusively owned 1756 * point. */ 1757 PetscCall(PetscLayoutCreate(comm, &layout)); 1758 PetscCall(PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned)); 1759 PetscCall(PetscLayoutSetUp(layout)); 1760 PetscCall(PetscLayoutGetRanges(layout, &cumSumVertices)); 1761 PetscCall(PetscMalloc1(pEnd-pStart, &globalNumbersOfLocalOwnedVertices)); 1762 for (i=0; i<pEnd-pStart; i++) {globalNumbersOfLocalOwnedVertices[i] = pStart - 1;} 1763 offset = cumSumVertices[rank]; 1764 counter = 0; 1765 for (i=0; i<pEnd-pStart; i++) { 1766 if (toBalance[i]) { 1767 if (degrees[i] > 0) { 1768 globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset; 1769 counter++; 1770 } 1771 } 1772 } 1773 1774 /* send the global numbers of vertices I own to the leafs so that they know to connect to it */ 1775 PetscCall(PetscMalloc1(pEnd-pStart, &leafGlobalNumbers)); 1776 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers,MPI_REPLACE)); 1777 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers,MPI_REPLACE)); 1778 1779 /* Build the graph for partitioning */ 1780 numRows = 1+numNonExclusivelyOwned; 1781 PetscCall(PetscLogEventBegin(DMPLEX_RebalBuildGraph, dm, 0, 0, 0)); 1782 PetscCall(MatCreate(comm, &A)); 1783 PetscCall(MatSetType(A, MATMPIADJ)); 1784 PetscCall(MatSetSizes(A, numRows, numRows, cumSumVertices[size], cumSumVertices[size])); 1785 idx = cumSumVertices[rank]; 1786 for (i=0; i<pEnd-pStart; i++) { 1787 if (toBalance[i]) { 1788 if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i]; 1789 else if (isLeaf[i]) jdx = leafGlobalNumbers[i]; 1790 else continue; 1791 PetscCall(MatSetValue(A, idx, jdx, 1, INSERT_VALUES)); 1792 PetscCall(MatSetValue(A, jdx, idx, 1, INSERT_VALUES)); 1793 } 1794 } 1795 PetscCall(PetscFree(globalNumbersOfLocalOwnedVertices)); 1796 PetscCall(PetscFree(leafGlobalNumbers)); 1797 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1798 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1799 PetscCall(PetscLogEventEnd(DMPLEX_RebalBuildGraph, dm, 0, 0, 0)); 1800 1801 nparts = size; 1802 ncon = 1; 1803 PetscCall(PetscMalloc1(ncon * nparts, &tpwgts)); 1804 for (i=0; i<ncon*nparts; i++) { 1805 tpwgts[i] = 1./(nparts); 1806 } 1807 PetscCall(PetscMalloc1(ncon, &ubvec)); 1808 ubvec[0] = 1.05; 1809 ubvec[1] = 1.05; 1810 1811 PetscCall(PetscMalloc1(ncon*(1 + numNonExclusivelyOwned), &vtxwgt)); 1812 if (ncon == 2) { 1813 vtxwgt[0] = numExclusivelyOwned; 1814 vtxwgt[1] = 1; 1815 for (i=0; i<numNonExclusivelyOwned; i++) { 1816 vtxwgt[ncon*(i+1)] = 1; 1817 vtxwgt[ncon*(i+1)+1] = 0; 1818 } 1819 } else { 1820 PetscInt base,ms; 1821 PetscCallMPI(MPI_Allreduce(&numExclusivelyOwned,&base,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)dm))); 1822 PetscCall(MatGetSize(A,&ms,NULL)); 1823 ms -= size; 1824 base = PetscMax(base,ms); 1825 vtxwgt[0] = base + numExclusivelyOwned; 1826 for (i=0; i<numNonExclusivelyOwned; i++) { 1827 vtxwgt[i + 1] = 1; 1828 } 1829 } 1830 1831 if (viewer) { 1832 PetscCall(PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %" PetscInt_FMT " on interface of mesh distribution.\n", entityDepth)); 1833 PetscCall(PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %" PetscInt_FMT "\n", cumSumVertices[size])); 1834 } 1835 /* TODO: Drop the parallel/sequential choice here and just use MatPartioner for much more flexibility */ 1836 if (usematpartitioning) { 1837 const char *prefix; 1838 1839 PetscCall(MatPartitioningCreate(PetscObjectComm((PetscObject)dm),&mp)); 1840 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)mp,"dm_plex_rebalance_shared_points_")); 1841 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm,&prefix)); 1842 PetscCall(PetscObjectPrependOptionsPrefix((PetscObject)mp,prefix)); 1843 PetscCall(MatPartitioningSetAdjacency(mp,A)); 1844 PetscCall(MatPartitioningSetNumberVertexWeights(mp,ncon)); 1845 PetscCall(MatPartitioningSetVertexWeights(mp,vtxwgt)); 1846 PetscCall(MatPartitioningSetFromOptions(mp)); 1847 PetscCall(MatPartitioningApply(mp,&ispart)); 1848 PetscCall(ISGetIndices(ispart,(const PetscInt **)&part)); 1849 } else if (parallel) { 1850 if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n")); 1851 PetscCall(PetscMalloc1(cumSumVertices[rank+1]-cumSumVertices[rank], &part)); 1852 PetscCall(MatGetRowIJ(A,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE,&numRows,&xadj,&adjncy,&done)); 1853 PetscCheck(done,PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Could not get adjacency information"); 1854 PetscCall(PetscMalloc1(4, &options)); 1855 options[0] = 1; 1856 options[1] = 0; /* Verbosity */ 1857 if (viewer) options[1] = 1; 1858 options[2] = 0; /* Seed */ 1859 options[3] = PARMETIS_PSR_COUPLED; /* Seed */ 1860 wgtflag = 2; 1861 numflag = 0; 1862 if (useInitialGuess) { 1863 PetscCall(PetscViewerASCIIPrintf(viewer, "THIS DOES NOT WORK! I don't know why. Using current distribution of points as initial guess.\n")); 1864 for (i=0; i<numRows; i++) part[i] = rank; 1865 if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n")); 1866 PetscStackPush("ParMETIS_V3_RefineKway"); 1867 PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition,0,0,0,0)); 1868 ierr = ParMETIS_V3_RefineKway((PetscInt*)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); 1869 PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition,0,0,0,0)); 1870 PetscStackPop; 1871 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()"); 1872 } else { 1873 PetscStackPush("ParMETIS_V3_PartKway"); 1874 PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition,0,0,0,0)); 1875 ierr = ParMETIS_V3_PartKway((PetscInt*)cumSumVertices, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt, NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); 1876 PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition,0,0,0,0)); 1877 PetscStackPop; 1878 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()"); 1879 } 1880 PetscCall(MatRestoreRowIJ(A,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE,&numRows,&xadj,&adjncy,&done)); 1881 PetscCall(PetscFree(options)); 1882 } else { 1883 if (viewer) PetscCall(PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n")); 1884 Mat As; 1885 PetscInt *partGlobal; 1886 PetscInt *numExclusivelyOwnedAll; 1887 1888 PetscCall(PetscMalloc1(cumSumVertices[rank+1]-cumSumVertices[rank], &part)); 1889 PetscCall(MatGetSize(A, &numRows, NULL)); 1890 PetscCall(PetscLogEventBegin(DMPLEX_RebalGatherGraph, dm, 0, 0, 0)); 1891 PetscCall(MatMPIAdjToSeqRankZero(A, &As)); 1892 PetscCall(PetscLogEventEnd(DMPLEX_RebalGatherGraph, dm, 0, 0, 0)); 1893 1894 PetscCall(PetscMalloc1(size, &numExclusivelyOwnedAll)); 1895 numExclusivelyOwnedAll[rank] = numExclusivelyOwned; 1896 PetscCallMPI(MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,numExclusivelyOwnedAll,1,MPIU_INT,comm)); 1897 1898 PetscCall(PetscMalloc1(numRows, &partGlobal)); 1899 PetscCall(PetscLogEventBegin(DMPLEX_RebalPartition,0,0,0,0)); 1900 if (rank == 0) { 1901 PetscInt *vtxwgt_g, numRows_g; 1902 1903 PetscCall(MatGetRowIJ(As,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE,&numRows_g,&xadj,&adjncy,&done)); 1904 PetscCall(PetscMalloc1(2*numRows_g, &vtxwgt_g)); 1905 for (i=0; i<size; i++) { 1906 vtxwgt_g[ncon*cumSumVertices[i]] = numExclusivelyOwnedAll[i]; 1907 if (ncon>1) vtxwgt_g[ncon*cumSumVertices[i]+1] = 1; 1908 for (j=cumSumVertices[i]+1; j<cumSumVertices[i+1]; j++) { 1909 vtxwgt_g[ncon*j] = 1; 1910 if (ncon>1) vtxwgt_g[2*j+1] = 0; 1911 } 1912 } 1913 1914 PetscCall(PetscMalloc1(64, &options)); 1915 ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */ 1916 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()"); 1917 options[METIS_OPTION_CONTIG] = 1; 1918 PetscStackPush("METIS_PartGraphKway"); 1919 ierr = METIS_PartGraphKway(&numRows_g, &ncon, (idx_t *)xadj, (idx_t *)adjncy, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal); 1920 PetscStackPop; 1921 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()"); 1922 PetscCall(PetscFree(options)); 1923 PetscCall(PetscFree(vtxwgt_g)); 1924 PetscCall(MatRestoreRowIJ(As,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE,&numRows_g,&xadj,&adjncy,&done)); 1925 PetscCall(MatDestroy(&As)); 1926 } 1927 PetscCall(PetscBarrier((PetscObject)dm)); 1928 PetscCall(PetscLogEventEnd(DMPLEX_RebalPartition,0,0,0,0)); 1929 PetscCall(PetscFree(numExclusivelyOwnedAll)); 1930 1931 /* scatter the partitioning information to ranks */ 1932 PetscCall(PetscLogEventBegin(DMPLEX_RebalScatterPart,0,0,0,0)); 1933 PetscCall(PetscMalloc1(size, &counts)); 1934 PetscCall(PetscMalloc1(size+1, &mpiCumSumVertices)); 1935 for (i=0; i<size; i++) { 1936 PetscCall(PetscMPIIntCast(cumSumVertices[i+1] - cumSumVertices[i], &(counts[i]))); 1937 } 1938 for (i=0; i<=size; i++) { 1939 PetscCall(PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i]))); 1940 } 1941 PetscCallMPI(MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm)); 1942 PetscCall(PetscFree(counts)); 1943 PetscCall(PetscFree(mpiCumSumVertices)); 1944 PetscCall(PetscFree(partGlobal)); 1945 PetscCall(PetscLogEventEnd(DMPLEX_RebalScatterPart,0,0,0,0)); 1946 } 1947 PetscCall(PetscFree(ubvec)); 1948 PetscCall(PetscFree(tpwgts)); 1949 1950 /* Rename the result so that the vertex resembling the exclusively owned points stays on the same rank */ 1951 PetscCall(PetscMalloc2(size, &firstVertices,size, &renumbering)); 1952 firstVertices[rank] = part[0]; 1953 PetscCallMPI(MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,firstVertices,1,MPIU_INT,comm)); 1954 for (i=0; i<size; i++) { 1955 renumbering[firstVertices[i]] = i; 1956 } 1957 for (i=0; i<cumSumVertices[rank+1]-cumSumVertices[rank]; i++) { 1958 part[i] = renumbering[part[i]]; 1959 } 1960 PetscCall(PetscFree2(firstVertices,renumbering)); 1961 1962 /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */ 1963 failed = (PetscInt)(part[0] != rank); 1964 PetscCallMPI(MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm)); 1965 if (failedGlobal > 0) { 1966 PetscCheck(failedGlobal <= 0,comm,PETSC_ERR_LIB,"Metis/Parmetis returned a bad partion"); 1967 PetscCall(PetscFree(vtxwgt)); 1968 PetscCall(PetscFree(toBalance)); 1969 PetscCall(PetscFree(isLeaf)); 1970 PetscCall(PetscFree(isNonExclusivelyOwned)); 1971 PetscCall(PetscFree(isExclusivelyOwned)); 1972 if (usematpartitioning) { 1973 PetscCall(ISRestoreIndices(ispart,(const PetscInt **)&part)); 1974 PetscCall(ISDestroy(&ispart)); 1975 } else PetscCall(PetscFree(part)); 1976 if (viewer) { 1977 PetscCall(PetscViewerPopFormat(viewer)); 1978 PetscCall(PetscViewerDestroy(&viewer)); 1979 } 1980 PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 /* Check how well we did distributing points*/ 1985 if (viewer) { 1986 PetscCall(PetscViewerASCIIPrintf(viewer, "Number of owned entities of depth %" PetscInt_FMT ".\n", entityDepth)); 1987 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial ")); 1988 PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, NULL, viewer)); 1989 PetscCall(PetscViewerASCIIPrintf(viewer, "Rebalanced ")); 1990 PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer)); 1991 } 1992 1993 /* Check that every vertex is owned by a process that it is actually connected to. */ 1994 PetscCall(MatGetRowIJ(A,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE,&numRows,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done)); 1995 for (i=1; i<=numNonExclusivelyOwned; i++) { 1996 PetscInt loc = 0; 1997 PetscCall(PetscFindInt(cumSumVertices[part[i]], xadj[i+1]-xadj[i], &adjncy[xadj[i]], &loc)); 1998 /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */ 1999 if (loc<0) { 2000 part[i] = rank; 2001 } 2002 } 2003 PetscCall(MatRestoreRowIJ(A,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE,&numRows,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done)); 2004 PetscCall(MatDestroy(&A)); 2005 2006 /* See how significant the influences of the previous fixing up step was.*/ 2007 if (viewer) { 2008 PetscCall(PetscViewerASCIIPrintf(viewer, "After fix ")); 2009 PetscCall(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer)); 2010 } 2011 if (!usematpartitioning) PetscCall(PetscFree(vtxwgt)); 2012 else PetscCall(MatPartitioningDestroy(&mp)); 2013 2014 PetscCall(PetscLayoutDestroy(&layout)); 2015 2016 PetscCall(PetscLogEventBegin(DMPLEX_RebalRewriteSF, dm, 0, 0, 0)); 2017 /* Rewrite the SF to reflect the new ownership. */ 2018 PetscCall(PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite)); 2019 counter = 0; 2020 for (i=0; i<pEnd-pStart; i++) { 2021 if (toBalance[i]) { 2022 if (isNonExclusivelyOwned[i]) { 2023 pointsToRewrite[counter] = i + pStart; 2024 counter++; 2025 } 2026 } 2027 } 2028 PetscCall(DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part+1, degrees)); 2029 PetscCall(PetscFree(pointsToRewrite)); 2030 PetscCall(PetscLogEventEnd(DMPLEX_RebalRewriteSF, dm, 0, 0, 0)); 2031 2032 PetscCall(PetscFree(toBalance)); 2033 PetscCall(PetscFree(isLeaf)); 2034 PetscCall(PetscFree(isNonExclusivelyOwned)); 2035 PetscCall(PetscFree(isExclusivelyOwned)); 2036 if (usematpartitioning) { 2037 PetscCall(ISRestoreIndices(ispart,(const PetscInt **)&part)); 2038 PetscCall(ISDestroy(&ispart)); 2039 } else PetscCall(PetscFree(part)); 2040 if (viewer) { 2041 PetscCall(PetscViewerPopFormat(viewer)); 2042 PetscCall(PetscViewerDestroy(&viewer)); 2043 } 2044 if (success) *success = PETSC_TRUE; 2045 PetscCall(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 2046 PetscFunctionReturn(0); 2047 #else 2048 SETERRQ(PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis."); 2049 #endif 2050 } 2051