1 #include <petsc/private/dmpleximpl.h> /*I "petscdmplex.h" I*/ 2 #include <petsc/private/partitionerimpl.h> 3 #include <petsc/private/hashseti.h> 4 5 const char * const DMPlexCSRAlgorithms[] = {"mat", "graph", "overlap", "DMPlexCSRAlgorithm", "DM_PLEX_CSR_",NULL}; 6 7 static inline PetscInt DMPlex_GlobalID(PetscInt point) { return point >= 0 ? point : -(point+1); } 8 9 static PetscErrorCode DMPlexCreatePartitionerGraph_Overlap(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 10 { 11 DM ovdm; 12 PetscSF sfPoint; 13 IS cellNumbering; 14 const PetscInt *cellNum; 15 PetscInt *adj = NULL, *vOffsets = NULL, *vAdj = NULL; 16 PetscBool useCone, useClosure; 17 PetscInt dim, depth, overlap, cStart, cEnd, c, v; 18 PetscMPIInt rank, size; 19 20 PetscFunctionBegin; 21 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 22 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject) dm), &size)); 23 CHKERRQ(DMGetDimension(dm, &dim)); 24 CHKERRQ(DMPlexGetDepth(dm, &depth)); 25 if (dim != depth) { 26 /* We do not handle the uninterpolated case here */ 27 CHKERRQ(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 28 /* DMPlexCreateNeighborCSR does not make a numbering */ 29 if (globalNumbering) CHKERRQ(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 30 /* Different behavior for empty graphs */ 31 if (!*numVertices) { 32 CHKERRQ(PetscMalloc1(1, offsets)); 33 (*offsets)[0] = 0; 34 } 35 /* Broken in parallel */ 36 if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 37 PetscFunctionReturn(0); 38 } 39 /* Always use FVM adjacency to create partitioner graph */ 40 CHKERRQ(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 41 CHKERRQ(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 42 /* Need overlap >= 1 */ 43 CHKERRQ(DMPlexGetOverlap(dm, &overlap)); 44 if (size && overlap < 1) { 45 CHKERRQ(DMPlexDistributeOverlap(dm, 1, NULL, &ovdm)); 46 } else { 47 CHKERRQ(PetscObjectReference((PetscObject) dm)); 48 ovdm = dm; 49 } 50 CHKERRQ(DMGetPointSF(ovdm, &sfPoint)); 51 CHKERRQ(DMPlexGetHeightStratum(ovdm, height, &cStart, &cEnd)); 52 CHKERRQ(DMPlexCreateNumbering_Plex(ovdm, cStart, cEnd, 0, NULL, sfPoint, &cellNumbering)); 53 if (globalNumbering) { 54 CHKERRQ(PetscObjectReference((PetscObject) cellNumbering)); 55 *globalNumbering = cellNumbering; 56 } 57 CHKERRQ(ISGetIndices(cellNumbering, &cellNum)); 58 /* Determine sizes */ 59 for (*numVertices = 0, c = cStart; c < cEnd; ++c) { 60 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 61 if (cellNum[c] < 0) continue; 62 (*numVertices)++; 63 } 64 CHKERRQ(PetscCalloc1(*numVertices+1, &vOffsets)); 65 for (c = cStart, v = 0; c < cEnd; ++c) { 66 PetscInt adjSize = PETSC_DETERMINE, a, vsize = 0; 67 68 if (cellNum[c] < 0) continue; 69 CHKERRQ(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj)); 70 for (a = 0; a < adjSize; ++a) { 71 const PetscInt point = adj[a]; 72 if (point != c && cStart <= point && point < cEnd) ++vsize; 73 } 74 vOffsets[v+1] = vOffsets[v] + vsize; 75 ++v; 76 } 77 /* Determine adjacency */ 78 CHKERRQ(PetscMalloc1(vOffsets[*numVertices], &vAdj)); 79 for (c = cStart, v = 0; c < cEnd; ++c) { 80 PetscInt adjSize = PETSC_DETERMINE, a, off = vOffsets[v]; 81 82 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 83 if (cellNum[c] < 0) continue; 84 CHKERRQ(DMPlexGetAdjacency(ovdm, c, &adjSize, &adj)); 85 for (a = 0; a < adjSize; ++a) { 86 const PetscInt point = adj[a]; 87 if (point != c && cStart <= point && point < cEnd) { 88 vAdj[off++] = DMPlex_GlobalID(cellNum[point]); 89 } 90 } 91 PetscCheck(off == vOffsets[v+1],PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Offsets %D should be %D", off, vOffsets[v+1]); 92 /* Sort adjacencies (not strictly necessary) */ 93 CHKERRQ(PetscSortInt(off-vOffsets[v], &vAdj[vOffsets[v]])); 94 ++v; 95 } 96 CHKERRQ(PetscFree(adj)); 97 CHKERRQ(ISRestoreIndices(cellNumbering, &cellNum)); 98 CHKERRQ(ISDestroy(&cellNumbering)); 99 CHKERRQ(DMSetBasicAdjacency(dm, useCone, useClosure)); 100 CHKERRQ(DMDestroy(&ovdm)); 101 if (offsets) {*offsets = vOffsets;} 102 else CHKERRQ(PetscFree(vOffsets)); 103 if (adjacency) {*adjacency = vAdj;} 104 else CHKERRQ(PetscFree(vAdj)); 105 PetscFunctionReturn(0); 106 } 107 108 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 109 { 110 PetscInt dim, depth, p, pStart, pEnd, a, adjSize, idx, size; 111 PetscInt *adj = NULL, *vOffsets = NULL, *graph = NULL; 112 IS cellNumbering; 113 const PetscInt *cellNum; 114 PetscBool useCone, useClosure; 115 PetscSection section; 116 PetscSegBuffer adjBuffer; 117 PetscSF sfPoint; 118 PetscInt *adjCells = NULL, *remoteCells = NULL; 119 const PetscInt *local; 120 PetscInt nroots, nleaves, l; 121 PetscMPIInt rank; 122 123 PetscFunctionBegin; 124 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 125 CHKERRQ(DMGetDimension(dm, &dim)); 126 CHKERRQ(DMPlexGetDepth(dm, &depth)); 127 if (dim != depth) { 128 /* We do not handle the uninterpolated case here */ 129 CHKERRQ(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 130 /* DMPlexCreateNeighborCSR does not make a numbering */ 131 if (globalNumbering) CHKERRQ(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 132 /* Different behavior for empty graphs */ 133 if (!*numVertices) { 134 CHKERRQ(PetscMalloc1(1, offsets)); 135 (*offsets)[0] = 0; 136 } 137 /* Broken in parallel */ 138 if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 139 PetscFunctionReturn(0); 140 } 141 CHKERRQ(DMGetPointSF(dm, &sfPoint)); 142 CHKERRQ(DMPlexGetHeightStratum(dm, height, &pStart, &pEnd)); 143 /* Build adjacency graph via a section/segbuffer */ 144 CHKERRQ(PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion)); 145 CHKERRQ(PetscSectionSetChart(section, pStart, pEnd)); 146 CHKERRQ(PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer)); 147 /* Always use FVM adjacency to create partitioner graph */ 148 CHKERRQ(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 149 CHKERRQ(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 150 CHKERRQ(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering)); 151 if (globalNumbering) { 152 CHKERRQ(PetscObjectReference((PetscObject)cellNumbering)); 153 *globalNumbering = cellNumbering; 154 } 155 CHKERRQ(ISGetIndices(cellNumbering, &cellNum)); 156 /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells 157 Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves) 158 */ 159 CHKERRQ(PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL)); 160 if (nroots >= 0) { 161 PetscInt fStart, fEnd, f; 162 163 CHKERRQ(PetscCalloc2(nroots, &adjCells, nroots, &remoteCells)); 164 CHKERRQ(DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd)); 165 for (l = 0; l < nroots; ++l) adjCells[l] = -3; 166 for (f = fStart; f < fEnd; ++f) { 167 const PetscInt *support; 168 PetscInt supportSize; 169 170 CHKERRQ(DMPlexGetSupport(dm, f, &support)); 171 CHKERRQ(DMPlexGetSupportSize(dm, f, &supportSize)); 172 if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]]); 173 else if (supportSize == 2) { 174 CHKERRQ(PetscFindInt(support[0], nleaves, local, &p)); 175 if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1]]); 176 CHKERRQ(PetscFindInt(support[1], nleaves, local, &p)); 177 if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]]); 178 } 179 /* Handle non-conforming meshes */ 180 if (supportSize > 2) { 181 PetscInt numChildren, i; 182 const PetscInt *children; 183 184 CHKERRQ(DMPlexGetTreeChildren(dm, f, &numChildren, &children)); 185 for (i = 0; i < numChildren; ++i) { 186 const PetscInt child = children[i]; 187 if (fStart <= child && child < fEnd) { 188 CHKERRQ(DMPlexGetSupport(dm, child, &support)); 189 CHKERRQ(DMPlexGetSupportSize(dm, child, &supportSize)); 190 if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]]); 191 else if (supportSize == 2) { 192 CHKERRQ(PetscFindInt(support[0], nleaves, local, &p)); 193 if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1]]); 194 CHKERRQ(PetscFindInt(support[1], nleaves, local, &p)); 195 if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]]); 196 } 197 } 198 } 199 } 200 } 201 for (l = 0; l < nroots; ++l) remoteCells[l] = -1; 202 CHKERRQ(PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells,MPI_REPLACE)); 203 CHKERRQ(PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells,MPI_REPLACE)); 204 CHKERRQ(PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX)); 205 CHKERRQ(PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX)); 206 } 207 /* Combine local and global adjacencies */ 208 for (*numVertices = 0, p = pStart; p < pEnd; p++) { 209 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 210 if (nroots > 0) {if (cellNum[p] < 0) continue;} 211 /* Add remote cells */ 212 if (remoteCells) { 213 const PetscInt gp = DMPlex_GlobalID(cellNum[p]); 214 PetscInt coneSize, numChildren, c, i; 215 const PetscInt *cone, *children; 216 217 CHKERRQ(DMPlexGetCone(dm, p, &cone)); 218 CHKERRQ(DMPlexGetConeSize(dm, p, &coneSize)); 219 for (c = 0; c < coneSize; ++c) { 220 const PetscInt point = cone[c]; 221 if (remoteCells[point] >= 0 && remoteCells[point] != gp) { 222 PetscInt *PETSC_RESTRICT pBuf; 223 CHKERRQ(PetscSectionAddDof(section, p, 1)); 224 CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 225 *pBuf = remoteCells[point]; 226 } 227 /* Handle non-conforming meshes */ 228 CHKERRQ(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 229 for (i = 0; i < numChildren; ++i) { 230 const PetscInt child = children[i]; 231 if (remoteCells[child] >= 0 && remoteCells[child] != gp) { 232 PetscInt *PETSC_RESTRICT pBuf; 233 CHKERRQ(PetscSectionAddDof(section, p, 1)); 234 CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 235 *pBuf = remoteCells[child]; 236 } 237 } 238 } 239 } 240 /* Add local cells */ 241 adjSize = PETSC_DETERMINE; 242 CHKERRQ(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 243 for (a = 0; a < adjSize; ++a) { 244 const PetscInt point = adj[a]; 245 if (point != p && pStart <= point && point < pEnd) { 246 PetscInt *PETSC_RESTRICT pBuf; 247 CHKERRQ(PetscSectionAddDof(section, p, 1)); 248 CHKERRQ(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 249 *pBuf = DMPlex_GlobalID(cellNum[point]); 250 } 251 } 252 (*numVertices)++; 253 } 254 CHKERRQ(PetscFree(adj)); 255 CHKERRQ(PetscFree2(adjCells, remoteCells)); 256 CHKERRQ(DMSetBasicAdjacency(dm, useCone, useClosure)); 257 258 /* Derive CSR graph from section/segbuffer */ 259 CHKERRQ(PetscSectionSetUp(section)); 260 CHKERRQ(PetscSectionGetStorageSize(section, &size)); 261 CHKERRQ(PetscMalloc1(*numVertices+1, &vOffsets)); 262 for (idx = 0, p = pStart; p < pEnd; p++) { 263 if (nroots > 0) {if (cellNum[p] < 0) continue;} 264 CHKERRQ(PetscSectionGetOffset(section, p, &(vOffsets[idx++]))); 265 } 266 vOffsets[*numVertices] = size; 267 CHKERRQ(PetscSegBufferExtractAlloc(adjBuffer, &graph)); 268 269 if (nroots >= 0) { 270 /* Filter out duplicate edges using section/segbuffer */ 271 CHKERRQ(PetscSectionReset(section)); 272 CHKERRQ(PetscSectionSetChart(section, 0, *numVertices)); 273 for (p = 0; p < *numVertices; p++) { 274 PetscInt start = vOffsets[p], end = vOffsets[p+1]; 275 PetscInt numEdges = end-start, *PETSC_RESTRICT edges; 276 CHKERRQ(PetscSortRemoveDupsInt(&numEdges, &graph[start])); 277 CHKERRQ(PetscSectionSetDof(section, p, numEdges)); 278 CHKERRQ(PetscSegBufferGetInts(adjBuffer, numEdges, &edges)); 279 CHKERRQ(PetscArraycpy(edges, &graph[start], numEdges)); 280 } 281 CHKERRQ(PetscFree(vOffsets)); 282 CHKERRQ(PetscFree(graph)); 283 /* Derive CSR graph from section/segbuffer */ 284 CHKERRQ(PetscSectionSetUp(section)); 285 CHKERRQ(PetscSectionGetStorageSize(section, &size)); 286 CHKERRQ(PetscMalloc1(*numVertices+1, &vOffsets)); 287 for (idx = 0, p = 0; p < *numVertices; p++) { 288 CHKERRQ(PetscSectionGetOffset(section, p, &(vOffsets[idx++]))); 289 } 290 vOffsets[*numVertices] = size; 291 CHKERRQ(PetscSegBufferExtractAlloc(adjBuffer, &graph)); 292 } else { 293 /* Sort adjacencies (not strictly necessary) */ 294 for (p = 0; p < *numVertices; p++) { 295 PetscInt start = vOffsets[p], end = vOffsets[p+1]; 296 CHKERRQ(PetscSortInt(end-start, &graph[start])); 297 } 298 } 299 300 if (offsets) *offsets = vOffsets; 301 if (adjacency) *adjacency = graph; 302 303 /* Cleanup */ 304 CHKERRQ(PetscSegBufferDestroy(&adjBuffer)); 305 CHKERRQ(PetscSectionDestroy(§ion)); 306 CHKERRQ(ISRestoreIndices(cellNumbering, &cellNum)); 307 CHKERRQ(ISDestroy(&cellNumbering)); 308 PetscFunctionReturn(0); 309 } 310 311 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 312 { 313 Mat conn, CSR; 314 IS fis, cis, cis_own; 315 PetscSF sfPoint; 316 const PetscInt *rows, *cols, *ii, *jj; 317 PetscInt *idxs,*idxs2; 318 PetscInt dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd; 319 PetscMPIInt rank; 320 PetscBool flg; 321 322 PetscFunctionBegin; 323 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 324 CHKERRQ(DMGetDimension(dm, &dim)); 325 CHKERRQ(DMPlexGetDepth(dm, &depth)); 326 if (dim != depth) { 327 /* We do not handle the uninterpolated case here */ 328 CHKERRQ(DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency)); 329 /* DMPlexCreateNeighborCSR does not make a numbering */ 330 if (globalNumbering) CHKERRQ(DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering)); 331 /* Different behavior for empty graphs */ 332 if (!*numVertices) { 333 CHKERRQ(PetscMalloc1(1, offsets)); 334 (*offsets)[0] = 0; 335 } 336 /* Broken in parallel */ 337 if (rank) PetscCheck(!*numVertices,PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported"); 338 PetscFunctionReturn(0); 339 } 340 /* Interpolated and parallel case */ 341 342 /* numbering */ 343 CHKERRQ(DMGetPointSF(dm, &sfPoint)); 344 CHKERRQ(DMPlexGetHeightStratum(dm, height, &cStart, &cEnd)); 345 CHKERRQ(DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd)); 346 CHKERRQ(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis)); 347 CHKERRQ(DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis)); 348 if (globalNumbering) { 349 CHKERRQ(ISDuplicate(cis, globalNumbering)); 350 } 351 352 /* get positive global ids and local sizes for facets and cells */ 353 CHKERRQ(ISGetLocalSize(fis, &m)); 354 CHKERRQ(ISGetIndices(fis, &rows)); 355 CHKERRQ(PetscMalloc1(m, &idxs)); 356 for (i = 0, floc = 0; i < m; i++) { 357 const PetscInt p = rows[i]; 358 359 if (p < 0) { 360 idxs[i] = -(p+1); 361 } else { 362 idxs[i] = p; 363 floc += 1; 364 } 365 } 366 CHKERRQ(ISRestoreIndices(fis, &rows)); 367 CHKERRQ(ISDestroy(&fis)); 368 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis)); 369 370 CHKERRQ(ISGetLocalSize(cis, &m)); 371 CHKERRQ(ISGetIndices(cis, &cols)); 372 CHKERRQ(PetscMalloc1(m, &idxs)); 373 CHKERRQ(PetscMalloc1(m, &idxs2)); 374 for (i = 0, cloc = 0; i < m; i++) { 375 const PetscInt p = cols[i]; 376 377 if (p < 0) { 378 idxs[i] = -(p+1); 379 } else { 380 idxs[i] = p; 381 idxs2[cloc++] = p; 382 } 383 } 384 CHKERRQ(ISRestoreIndices(cis, &cols)); 385 CHKERRQ(ISDestroy(&cis)); 386 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis)); 387 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own)); 388 389 /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */ 390 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)dm), &conn)); 391 CHKERRQ(MatSetSizes(conn, floc, cloc, M, N)); 392 CHKERRQ(MatSetType(conn, MATMPIAIJ)); 393 CHKERRQ(DMPlexGetMaxSizes(dm, NULL, &lm)); 394 CHKERRMPI(MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject) dm))); 395 CHKERRQ(MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL)); 396 397 /* Assemble matrix */ 398 CHKERRQ(ISGetIndices(fis, &rows)); 399 CHKERRQ(ISGetIndices(cis, &cols)); 400 for (c = cStart; c < cEnd; c++) { 401 const PetscInt *cone; 402 PetscInt coneSize, row, col, f; 403 404 col = cols[c-cStart]; 405 CHKERRQ(DMPlexGetCone(dm, c, &cone)); 406 CHKERRQ(DMPlexGetConeSize(dm, c, &coneSize)); 407 for (f = 0; f < coneSize; f++) { 408 const PetscScalar v = 1.0; 409 const PetscInt *children; 410 PetscInt numChildren, ch; 411 412 row = rows[cone[f]-fStart]; 413 CHKERRQ(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES)); 414 415 /* non-conforming meshes */ 416 CHKERRQ(DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children)); 417 for (ch = 0; ch < numChildren; ch++) { 418 const PetscInt child = children[ch]; 419 420 if (child < fStart || child >= fEnd) continue; 421 row = rows[child-fStart]; 422 CHKERRQ(MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES)); 423 } 424 } 425 } 426 CHKERRQ(ISRestoreIndices(fis, &rows)); 427 CHKERRQ(ISRestoreIndices(cis, &cols)); 428 CHKERRQ(ISDestroy(&fis)); 429 CHKERRQ(ISDestroy(&cis)); 430 CHKERRQ(MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY)); 431 CHKERRQ(MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY)); 432 433 /* Get parallel CSR by doing conn^T * conn */ 434 CHKERRQ(MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR)); 435 CHKERRQ(MatDestroy(&conn)); 436 437 /* extract local part of the CSR */ 438 CHKERRQ(MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn)); 439 CHKERRQ(MatDestroy(&CSR)); 440 CHKERRQ(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg)); 441 PetscCheck(flg,PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format"); 442 443 /* get back requested output */ 444 if (numVertices) *numVertices = m; 445 if (offsets) { 446 CHKERRQ(PetscCalloc1(m+1, &idxs)); 447 for (i = 1; i < m+1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */ 448 *offsets = idxs; 449 } 450 if (adjacency) { 451 CHKERRQ(PetscMalloc1(ii[m] - m, &idxs)); 452 CHKERRQ(ISGetIndices(cis_own, &rows)); 453 for (i = 0, c = 0; i < m; i++) { 454 PetscInt j, g = rows[i]; 455 456 for (j = ii[i]; j < ii[i+1]; j++) { 457 if (jj[j] == g) continue; /* again, self-connectivity */ 458 idxs[c++] = jj[j]; 459 } 460 } 461 PetscCheck(c == ii[m] - m,PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %D != %D",c,ii[m]-m); 462 CHKERRQ(ISRestoreIndices(cis_own, &rows)); 463 *adjacency = idxs; 464 } 465 466 /* cleanup */ 467 CHKERRQ(ISDestroy(&cis_own)); 468 CHKERRQ(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg)); 469 PetscCheck(flg,PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format"); 470 CHKERRQ(MatDestroy(&conn)); 471 PetscFunctionReturn(0); 472 } 473 474 /*@C 475 DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner 476 477 Input Parameters: 478 + dm - The mesh DM dm 479 - height - Height of the strata from which to construct the graph 480 481 Output Parameters: 482 + numVertices - Number of vertices in the graph 483 . offsets - Point offsets in the graph 484 . adjacency - Point connectivity in the graph 485 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency". Negative indicates that the cell is a duplicate from another process. 486 487 The user can control the definition of adjacency for the mesh using DMSetAdjacency(). They should choose the combination appropriate for the function 488 representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree(). 489 490 Options Database Keys: 491 . -dm_plex_csr_alg <mat,graph,overlap> - Choose the algorithm for computing the CSR graph 492 493 Level: developer 494 495 .seealso: PetscPartitionerGetType(), PetscPartitionerCreate(), DMSetAdjacency() 496 @*/ 497 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering) 498 { 499 DMPlexCSRAlgorithm alg = DM_PLEX_CSR_GRAPH; 500 501 PetscFunctionBegin; 502 CHKERRQ(PetscOptionsGetEnum(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-dm_plex_csr_alg", DMPlexCSRAlgorithms, (PetscEnum *) &alg, NULL)); 503 switch (alg) { 504 case DM_PLEX_CSR_MAT: 505 CHKERRQ(DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering));break; 506 case DM_PLEX_CSR_GRAPH: 507 CHKERRQ(DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering));break; 508 case DM_PLEX_CSR_OVERLAP: 509 CHKERRQ(DMPlexCreatePartitionerGraph_Overlap(dm, height, numVertices, offsets, adjacency, globalNumbering));break; 510 } 511 PetscFunctionReturn(0); 512 } 513 514 /*@C 515 DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format. 516 517 Collective on DM 518 519 Input Parameters: 520 + dm - The DMPlex 521 - cellHeight - The height of mesh points to treat as cells (default should be 0) 522 523 Output Parameters: 524 + numVertices - The number of local vertices in the graph, or cells in the mesh. 525 . offsets - The offset to the adjacency list for each cell 526 - adjacency - The adjacency list for all cells 527 528 Note: This is suitable for input to a mesh partitioner like ParMetis. 529 530 Level: advanced 531 532 .seealso: DMPlexCreate() 533 @*/ 534 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency) 535 { 536 const PetscInt maxFaceCases = 30; 537 PetscInt numFaceCases = 0; 538 PetscInt numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */ 539 PetscInt *off, *adj; 540 PetscInt *neighborCells = NULL; 541 PetscInt dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell; 542 543 PetscFunctionBegin; 544 /* For parallel partitioning, I think you have to communicate supports */ 545 CHKERRQ(DMGetDimension(dm, &dim)); 546 cellDim = dim - cellHeight; 547 CHKERRQ(DMPlexGetDepth(dm, &depth)); 548 CHKERRQ(DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd)); 549 if (cEnd - cStart == 0) { 550 if (numVertices) *numVertices = 0; 551 if (offsets) *offsets = NULL; 552 if (adjacency) *adjacency = NULL; 553 PetscFunctionReturn(0); 554 } 555 numCells = cEnd - cStart; 556 faceDepth = depth - cellHeight; 557 if (dim == depth) { 558 PetscInt f, fStart, fEnd; 559 560 CHKERRQ(PetscCalloc1(numCells+1, &off)); 561 /* Count neighboring cells */ 562 CHKERRQ(DMPlexGetHeightStratum(dm, cellHeight+1, &fStart, &fEnd)); 563 for (f = fStart; f < fEnd; ++f) { 564 const PetscInt *support; 565 PetscInt supportSize; 566 CHKERRQ(DMPlexGetSupportSize(dm, f, &supportSize)); 567 CHKERRQ(DMPlexGetSupport(dm, f, &support)); 568 if (supportSize == 2) { 569 PetscInt numChildren; 570 571 CHKERRQ(DMPlexGetTreeChildren(dm,f,&numChildren,NULL)); 572 if (!numChildren) { 573 ++off[support[0]-cStart+1]; 574 ++off[support[1]-cStart+1]; 575 } 576 } 577 } 578 /* Prefix sum */ 579 for (c = 1; c <= numCells; ++c) off[c] += off[c-1]; 580 if (adjacency) { 581 PetscInt *tmp; 582 583 CHKERRQ(PetscMalloc1(off[numCells], &adj)); 584 CHKERRQ(PetscMalloc1(numCells+1, &tmp)); 585 CHKERRQ(PetscArraycpy(tmp, off, numCells+1)); 586 /* Get neighboring cells */ 587 for (f = fStart; f < fEnd; ++f) { 588 const PetscInt *support; 589 PetscInt supportSize; 590 CHKERRQ(DMPlexGetSupportSize(dm, f, &supportSize)); 591 CHKERRQ(DMPlexGetSupport(dm, f, &support)); 592 if (supportSize == 2) { 593 PetscInt numChildren; 594 595 CHKERRQ(DMPlexGetTreeChildren(dm,f,&numChildren,NULL)); 596 if (!numChildren) { 597 adj[tmp[support[0]-cStart]++] = support[1]; 598 adj[tmp[support[1]-cStart]++] = support[0]; 599 } 600 } 601 } 602 for (c = 0; c < cEnd-cStart; ++c) PetscAssert(tmp[c] == off[c+1],PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %d != %d for cell %d", tmp[c], off[c], c+cStart); 603 CHKERRQ(PetscFree(tmp)); 604 } 605 if (numVertices) *numVertices = numCells; 606 if (offsets) *offsets = off; 607 if (adjacency) *adjacency = adj; 608 PetscFunctionReturn(0); 609 } 610 /* Setup face recognition */ 611 if (faceDepth == 1) { 612 PetscInt cornersSeen[30] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; /* Could use PetscBT */ 613 614 for (c = cStart; c < cEnd; ++c) { 615 PetscInt corners; 616 617 CHKERRQ(DMPlexGetConeSize(dm, c, &corners)); 618 if (!cornersSeen[corners]) { 619 PetscInt nFV; 620 621 PetscCheck(numFaceCases < maxFaceCases,PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases"); 622 cornersSeen[corners] = 1; 623 624 CHKERRQ(DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV)); 625 626 numFaceVertices[numFaceCases++] = nFV; 627 } 628 } 629 } 630 CHKERRQ(PetscCalloc1(numCells+1, &off)); 631 /* Count neighboring cells */ 632 for (cell = cStart; cell < cEnd; ++cell) { 633 PetscInt numNeighbors = PETSC_DETERMINE, n; 634 635 CHKERRQ(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells)); 636 /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */ 637 for (n = 0; n < numNeighbors; ++n) { 638 PetscInt cellPair[2]; 639 PetscBool found = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE; 640 PetscInt meetSize = 0; 641 const PetscInt *meet = NULL; 642 643 cellPair[0] = cell; cellPair[1] = neighborCells[n]; 644 if (cellPair[0] == cellPair[1]) continue; 645 if (!found) { 646 CHKERRQ(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet)); 647 if (meetSize) { 648 PetscInt f; 649 650 for (f = 0; f < numFaceCases; ++f) { 651 if (numFaceVertices[f] == meetSize) { 652 found = PETSC_TRUE; 653 break; 654 } 655 } 656 } 657 CHKERRQ(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet)); 658 } 659 if (found) ++off[cell-cStart+1]; 660 } 661 } 662 /* Prefix sum */ 663 for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell-1]; 664 665 if (adjacency) { 666 CHKERRQ(PetscMalloc1(off[numCells], &adj)); 667 /* Get neighboring cells */ 668 for (cell = cStart; cell < cEnd; ++cell) { 669 PetscInt numNeighbors = PETSC_DETERMINE, n; 670 PetscInt cellOffset = 0; 671 672 CHKERRQ(DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells)); 673 /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */ 674 for (n = 0; n < numNeighbors; ++n) { 675 PetscInt cellPair[2]; 676 PetscBool found = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE; 677 PetscInt meetSize = 0; 678 const PetscInt *meet = NULL; 679 680 cellPair[0] = cell; cellPair[1] = neighborCells[n]; 681 if (cellPair[0] == cellPair[1]) continue; 682 if (!found) { 683 CHKERRQ(DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet)); 684 if (meetSize) { 685 PetscInt f; 686 687 for (f = 0; f < numFaceCases; ++f) { 688 if (numFaceVertices[f] == meetSize) { 689 found = PETSC_TRUE; 690 break; 691 } 692 } 693 } 694 CHKERRQ(DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet)); 695 } 696 if (found) { 697 adj[off[cell-cStart]+cellOffset] = neighborCells[n]; 698 ++cellOffset; 699 } 700 } 701 } 702 } 703 CHKERRQ(PetscFree(neighborCells)); 704 if (numVertices) *numVertices = numCells; 705 if (offsets) *offsets = off; 706 if (adjacency) *adjacency = adj; 707 PetscFunctionReturn(0); 708 } 709 710 /*@ 711 PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh 712 713 Collective on PetscPartitioner 714 715 Input Parameters: 716 + part - The PetscPartitioner 717 . targetSection - The PetscSection describing the absolute weight of each partition (can be NULL) 718 - dm - The mesh DM 719 720 Output Parameters: 721 + partSection - The PetscSection giving the division of points by partition 722 - partition - The list of points by partition 723 724 Notes: 725 If the DM has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified 726 by the section in the transitive closure of the point. 727 728 Level: developer 729 730 .seealso DMPlexDistribute(), PetscPartitionerCreate(), PetscSectionCreate(), PetscSectionSetChart(), PetscPartitionerPartition() 731 @*/ 732 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition) 733 { 734 PetscMPIInt size; 735 PetscBool isplex; 736 PetscSection vertSection = NULL; 737 738 PetscFunctionBegin; 739 PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1); 740 PetscValidHeaderSpecific(dm, DM_CLASSID, 2); 741 if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3); 742 PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4); 743 PetscValidPointer(partition, 5); 744 CHKERRQ(PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex)); 745 PetscCheck(isplex,PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Not for type %s",((PetscObject)dm)->type_name); 746 CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject) part), &size)); 747 if (size == 1) { 748 PetscInt *points; 749 PetscInt cStart, cEnd, c; 750 751 CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd)); 752 CHKERRQ(PetscSectionReset(partSection)); 753 CHKERRQ(PetscSectionSetChart(partSection, 0, size)); 754 CHKERRQ(PetscSectionSetDof(partSection, 0, cEnd-cStart)); 755 CHKERRQ(PetscSectionSetUp(partSection)); 756 CHKERRQ(PetscMalloc1(cEnd-cStart, &points)); 757 for (c = cStart; c < cEnd; ++c) points[c] = c; 758 CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject) part), cEnd-cStart, points, PETSC_OWN_POINTER, partition)); 759 PetscFunctionReturn(0); 760 } 761 if (part->height == 0) { 762 PetscInt numVertices = 0; 763 PetscInt *start = NULL; 764 PetscInt *adjacency = NULL; 765 IS globalNumbering; 766 767 if (!part->noGraph || part->viewGraph) { 768 CHKERRQ(DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering)); 769 } else { /* only compute the number of owned local vertices */ 770 const PetscInt *idxs; 771 PetscInt p, pStart, pEnd; 772 773 CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd)); 774 CHKERRQ(DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering)); 775 CHKERRQ(ISGetIndices(globalNumbering, &idxs)); 776 for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1; 777 CHKERRQ(ISRestoreIndices(globalNumbering, &idxs)); 778 } 779 if (part->usevwgt) { 780 PetscSection section = dm->localSection, clSection = NULL; 781 IS clPoints = NULL; 782 const PetscInt *gid,*clIdx; 783 PetscInt v, p, pStart, pEnd; 784 785 /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */ 786 /* We do this only if the local section has been set */ 787 if (section) { 788 CHKERRQ(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL)); 789 if (!clSection) { 790 CHKERRQ(DMPlexCreateClosureIndex(dm,NULL)); 791 } 792 CHKERRQ(PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints)); 793 CHKERRQ(ISGetIndices(clPoints,&clIdx)); 794 } 795 CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd)); 796 CHKERRQ(PetscSectionCreate(PETSC_COMM_SELF, &vertSection)); 797 CHKERRQ(PetscSectionSetChart(vertSection, 0, numVertices)); 798 if (globalNumbering) { 799 CHKERRQ(ISGetIndices(globalNumbering,&gid)); 800 } else gid = NULL; 801 for (p = pStart, v = 0; p < pEnd; ++p) { 802 PetscInt dof = 1; 803 804 /* skip cells in the overlap */ 805 if (gid && gid[p-pStart] < 0) continue; 806 807 if (section) { 808 PetscInt cl, clSize, clOff; 809 810 dof = 0; 811 CHKERRQ(PetscSectionGetDof(clSection, p, &clSize)); 812 CHKERRQ(PetscSectionGetOffset(clSection, p, &clOff)); 813 for (cl = 0; cl < clSize; cl+=2) { 814 PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */ 815 816 CHKERRQ(PetscSectionGetDof(section, clPoint, &clDof)); 817 dof += clDof; 818 } 819 } 820 PetscCheck(dof,PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of dofs for point %D in the local section should be positive",p); 821 CHKERRQ(PetscSectionSetDof(vertSection, v, dof)); 822 v++; 823 } 824 if (globalNumbering) { 825 CHKERRQ(ISRestoreIndices(globalNumbering,&gid)); 826 } 827 if (clPoints) { 828 CHKERRQ(ISRestoreIndices(clPoints,&clIdx)); 829 } 830 CHKERRQ(PetscSectionSetUp(vertSection)); 831 } 832 CHKERRQ(PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition)); 833 CHKERRQ(PetscFree(start)); 834 CHKERRQ(PetscFree(adjacency)); 835 if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */ 836 const PetscInt *globalNum; 837 const PetscInt *partIdx; 838 PetscInt *map, cStart, cEnd; 839 PetscInt *adjusted, i, localSize, offset; 840 IS newPartition; 841 842 CHKERRQ(ISGetLocalSize(*partition,&localSize)); 843 CHKERRQ(PetscMalloc1(localSize,&adjusted)); 844 CHKERRQ(ISGetIndices(globalNumbering,&globalNum)); 845 CHKERRQ(ISGetIndices(*partition,&partIdx)); 846 CHKERRQ(PetscMalloc1(localSize,&map)); 847 CHKERRQ(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd)); 848 for (i = cStart, offset = 0; i < cEnd; i++) { 849 if (globalNum[i - cStart] >= 0) map[offset++] = i; 850 } 851 for (i = 0; i < localSize; i++) { 852 adjusted[i] = map[partIdx[i]]; 853 } 854 CHKERRQ(PetscFree(map)); 855 CHKERRQ(ISRestoreIndices(*partition,&partIdx)); 856 CHKERRQ(ISRestoreIndices(globalNumbering,&globalNum)); 857 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,localSize,adjusted,PETSC_OWN_POINTER,&newPartition)); 858 CHKERRQ(ISDestroy(&globalNumbering)); 859 CHKERRQ(ISDestroy(partition)); 860 *partition = newPartition; 861 } 862 } else SETERRQ(PetscObjectComm((PetscObject) part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %D for points to partition", part->height); 863 CHKERRQ(PetscSectionDestroy(&vertSection)); 864 PetscFunctionReturn(0); 865 } 866 867 /*@ 868 DMPlexGetPartitioner - Get the mesh partitioner 869 870 Not collective 871 872 Input Parameter: 873 . dm - The DM 874 875 Output Parameter: 876 . part - The PetscPartitioner 877 878 Level: developer 879 880 Note: This gets a borrowed reference, so the user should not destroy this PetscPartitioner. 881 882 .seealso DMPlexDistribute(), DMPlexSetPartitioner(), PetscPartitionerDMPlexPartition(), PetscPartitionerCreate() 883 @*/ 884 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part) 885 { 886 DM_Plex *mesh = (DM_Plex *) dm->data; 887 888 PetscFunctionBegin; 889 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 890 PetscValidPointer(part, 2); 891 *part = mesh->partitioner; 892 PetscFunctionReturn(0); 893 } 894 895 /*@ 896 DMPlexSetPartitioner - Set the mesh partitioner 897 898 logically collective on DM 899 900 Input Parameters: 901 + dm - The DM 902 - part - The partitioner 903 904 Level: developer 905 906 Note: Any existing PetscPartitioner will be destroyed. 907 908 .seealso DMPlexDistribute(), DMPlexGetPartitioner(), PetscPartitionerCreate() 909 @*/ 910 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part) 911 { 912 DM_Plex *mesh = (DM_Plex *) dm->data; 913 914 PetscFunctionBegin; 915 PetscValidHeaderSpecific(dm, DM_CLASSID, 1); 916 PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2); 917 CHKERRQ(PetscObjectReference((PetscObject)part)); 918 CHKERRQ(PetscPartitionerDestroy(&mesh->partitioner)); 919 mesh->partitioner = part; 920 PetscFunctionReturn(0); 921 } 922 923 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point) 924 { 925 const PetscInt *cone; 926 PetscInt coneSize, c; 927 PetscBool missing; 928 929 PetscFunctionBeginHot; 930 CHKERRQ(PetscHSetIQueryAdd(ht, point, &missing)); 931 if (missing) { 932 CHKERRQ(DMPlexGetCone(dm, point, &cone)); 933 CHKERRQ(DMPlexGetConeSize(dm, point, &coneSize)); 934 for (c = 0; c < coneSize; c++) { 935 CHKERRQ(DMPlexAddClosure_Private(dm, ht, cone[c])); 936 } 937 } 938 PetscFunctionReturn(0); 939 } 940 941 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down) 942 { 943 PetscFunctionBegin; 944 if (up) { 945 PetscInt parent; 946 947 CHKERRQ(DMPlexGetTreeParent(dm,point,&parent,NULL)); 948 if (parent != point) { 949 PetscInt closureSize, *closure = NULL, i; 950 951 CHKERRQ(DMPlexGetTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure)); 952 for (i = 0; i < closureSize; i++) { 953 PetscInt cpoint = closure[2*i]; 954 955 CHKERRQ(PetscHSetIAdd(ht, cpoint)); 956 CHKERRQ(DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_TRUE,PETSC_FALSE)); 957 } 958 CHKERRQ(DMPlexRestoreTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure)); 959 } 960 } 961 if (down) { 962 PetscInt numChildren; 963 const PetscInt *children; 964 965 CHKERRQ(DMPlexGetTreeChildren(dm,point,&numChildren,&children)); 966 if (numChildren) { 967 PetscInt i; 968 969 for (i = 0; i < numChildren; i++) { 970 PetscInt cpoint = children[i]; 971 972 CHKERRQ(PetscHSetIAdd(ht, cpoint)); 973 CHKERRQ(DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_FALSE,PETSC_TRUE)); 974 } 975 } 976 } 977 PetscFunctionReturn(0); 978 } 979 980 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point) 981 { 982 PetscInt parent; 983 984 PetscFunctionBeginHot; 985 CHKERRQ(DMPlexGetTreeParent(dm, point, &parent,NULL)); 986 if (point != parent) { 987 const PetscInt *cone; 988 PetscInt coneSize, c; 989 990 CHKERRQ(DMPlexAddClosureTree_Up_Private(dm, ht, parent)); 991 CHKERRQ(DMPlexAddClosure_Private(dm, ht, parent)); 992 CHKERRQ(DMPlexGetCone(dm, parent, &cone)); 993 CHKERRQ(DMPlexGetConeSize(dm, parent, &coneSize)); 994 for (c = 0; c < coneSize; c++) { 995 const PetscInt cp = cone[c]; 996 997 CHKERRQ(DMPlexAddClosureTree_Up_Private(dm, ht, cp)); 998 } 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point) 1004 { 1005 PetscInt i, numChildren; 1006 const PetscInt *children; 1007 1008 PetscFunctionBeginHot; 1009 CHKERRQ(DMPlexGetTreeChildren(dm, point, &numChildren, &children)); 1010 for (i = 0; i < numChildren; i++) { 1011 CHKERRQ(PetscHSetIAdd(ht, children[i])); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point) 1017 { 1018 const PetscInt *cone; 1019 PetscInt coneSize, c; 1020 1021 PetscFunctionBeginHot; 1022 CHKERRQ(PetscHSetIAdd(ht, point)); 1023 CHKERRQ(DMPlexAddClosureTree_Up_Private(dm, ht, point)); 1024 CHKERRQ(DMPlexAddClosureTree_Down_Private(dm, ht, point)); 1025 CHKERRQ(DMPlexGetCone(dm, point, &cone)); 1026 CHKERRQ(DMPlexGetConeSize(dm, point, &coneSize)); 1027 for (c = 0; c < coneSize; c++) { 1028 CHKERRQ(DMPlexAddClosureTree_Private(dm, ht, cone[c])); 1029 } 1030 PetscFunctionReturn(0); 1031 } 1032 1033 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS) 1034 { 1035 DM_Plex *mesh = (DM_Plex *)dm->data; 1036 const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE; 1037 PetscInt nelems, *elems, off = 0, p; 1038 PetscHSetI ht = NULL; 1039 1040 PetscFunctionBegin; 1041 CHKERRQ(PetscHSetICreate(&ht)); 1042 CHKERRQ(PetscHSetIResize(ht, numPoints*16)); 1043 if (!hasTree) { 1044 for (p = 0; p < numPoints; ++p) { 1045 CHKERRQ(DMPlexAddClosure_Private(dm, ht, points[p])); 1046 } 1047 } else { 1048 #if 1 1049 for (p = 0; p < numPoints; ++p) { 1050 CHKERRQ(DMPlexAddClosureTree_Private(dm, ht, points[p])); 1051 } 1052 #else 1053 PetscInt *closure = NULL, closureSize, c; 1054 for (p = 0; p < numPoints; ++p) { 1055 CHKERRQ(DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure)); 1056 for (c = 0; c < closureSize*2; c += 2) { 1057 CHKERRQ(PetscHSetIAdd(ht, closure[c])); 1058 if (hasTree) CHKERRQ(DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE)); 1059 } 1060 } 1061 if (closure) CHKERRQ(DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure)); 1062 #endif 1063 } 1064 CHKERRQ(PetscHSetIGetSize(ht, &nelems)); 1065 CHKERRQ(PetscMalloc1(nelems, &elems)); 1066 CHKERRQ(PetscHSetIGetElems(ht, &off, elems)); 1067 CHKERRQ(PetscHSetIDestroy(&ht)); 1068 CHKERRQ(PetscSortInt(nelems, elems)); 1069 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS)); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 /*@ 1074 DMPlexPartitionLabelClosure - Add the closure of all points to the partition label 1075 1076 Input Parameters: 1077 + dm - The DM 1078 - label - DMLabel assigning ranks to remote roots 1079 1080 Level: developer 1081 1082 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap() 1083 @*/ 1084 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label) 1085 { 1086 IS rankIS, pointIS, closureIS; 1087 const PetscInt *ranks, *points; 1088 PetscInt numRanks, numPoints, r; 1089 1090 PetscFunctionBegin; 1091 CHKERRQ(DMLabelGetValueIS(label, &rankIS)); 1092 CHKERRQ(ISGetLocalSize(rankIS, &numRanks)); 1093 CHKERRQ(ISGetIndices(rankIS, &ranks)); 1094 for (r = 0; r < numRanks; ++r) { 1095 const PetscInt rank = ranks[r]; 1096 CHKERRQ(DMLabelGetStratumIS(label, rank, &pointIS)); 1097 CHKERRQ(ISGetLocalSize(pointIS, &numPoints)); 1098 CHKERRQ(ISGetIndices(pointIS, &points)); 1099 CHKERRQ(DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS)); 1100 CHKERRQ(ISRestoreIndices(pointIS, &points)); 1101 CHKERRQ(ISDestroy(&pointIS)); 1102 CHKERRQ(DMLabelSetStratumIS(label, rank, closureIS)); 1103 CHKERRQ(ISDestroy(&closureIS)); 1104 } 1105 CHKERRQ(ISRestoreIndices(rankIS, &ranks)); 1106 CHKERRQ(ISDestroy(&rankIS)); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 /*@ 1111 DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label 1112 1113 Input Parameters: 1114 + dm - The DM 1115 - label - DMLabel assigning ranks to remote roots 1116 1117 Level: developer 1118 1119 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap() 1120 @*/ 1121 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label) 1122 { 1123 IS rankIS, pointIS; 1124 const PetscInt *ranks, *points; 1125 PetscInt numRanks, numPoints, r, p, a, adjSize; 1126 PetscInt *adj = NULL; 1127 1128 PetscFunctionBegin; 1129 CHKERRQ(DMLabelGetValueIS(label, &rankIS)); 1130 CHKERRQ(ISGetLocalSize(rankIS, &numRanks)); 1131 CHKERRQ(ISGetIndices(rankIS, &ranks)); 1132 for (r = 0; r < numRanks; ++r) { 1133 const PetscInt rank = ranks[r]; 1134 1135 CHKERRQ(DMLabelGetStratumIS(label, rank, &pointIS)); 1136 CHKERRQ(ISGetLocalSize(pointIS, &numPoints)); 1137 CHKERRQ(ISGetIndices(pointIS, &points)); 1138 for (p = 0; p < numPoints; ++p) { 1139 adjSize = PETSC_DETERMINE; 1140 CHKERRQ(DMPlexGetAdjacency(dm, points[p], &adjSize, &adj)); 1141 for (a = 0; a < adjSize; ++a) CHKERRQ(DMLabelSetValue(label, adj[a], rank)); 1142 } 1143 CHKERRQ(ISRestoreIndices(pointIS, &points)); 1144 CHKERRQ(ISDestroy(&pointIS)); 1145 } 1146 CHKERRQ(ISRestoreIndices(rankIS, &ranks)); 1147 CHKERRQ(ISDestroy(&rankIS)); 1148 CHKERRQ(PetscFree(adj)); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /*@ 1153 DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point SF 1154 1155 Input Parameters: 1156 + dm - The DM 1157 - label - DMLabel assigning ranks to remote roots 1158 1159 Level: developer 1160 1161 Note: This is required when generating multi-level overlaps to capture 1162 overlap points from non-neighbouring partitions. 1163 1164 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap() 1165 @*/ 1166 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label) 1167 { 1168 MPI_Comm comm; 1169 PetscMPIInt rank; 1170 PetscSF sfPoint; 1171 DMLabel lblRoots, lblLeaves; 1172 IS rankIS, pointIS; 1173 const PetscInt *ranks; 1174 PetscInt numRanks, r; 1175 1176 PetscFunctionBegin; 1177 CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm)); 1178 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 1179 CHKERRQ(DMGetPointSF(dm, &sfPoint)); 1180 /* Pull point contributions from remote leaves into local roots */ 1181 CHKERRQ(DMLabelGather(label, sfPoint, &lblLeaves)); 1182 CHKERRQ(DMLabelGetValueIS(lblLeaves, &rankIS)); 1183 CHKERRQ(ISGetLocalSize(rankIS, &numRanks)); 1184 CHKERRQ(ISGetIndices(rankIS, &ranks)); 1185 for (r = 0; r < numRanks; ++r) { 1186 const PetscInt remoteRank = ranks[r]; 1187 if (remoteRank == rank) continue; 1188 CHKERRQ(DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS)); 1189 CHKERRQ(DMLabelInsertIS(label, pointIS, remoteRank)); 1190 CHKERRQ(ISDestroy(&pointIS)); 1191 } 1192 CHKERRQ(ISRestoreIndices(rankIS, &ranks)); 1193 CHKERRQ(ISDestroy(&rankIS)); 1194 CHKERRQ(DMLabelDestroy(&lblLeaves)); 1195 /* Push point contributions from roots into remote leaves */ 1196 CHKERRQ(DMLabelDistribute(label, sfPoint, &lblRoots)); 1197 CHKERRQ(DMLabelGetValueIS(lblRoots, &rankIS)); 1198 CHKERRQ(ISGetLocalSize(rankIS, &numRanks)); 1199 CHKERRQ(ISGetIndices(rankIS, &ranks)); 1200 for (r = 0; r < numRanks; ++r) { 1201 const PetscInt remoteRank = ranks[r]; 1202 if (remoteRank == rank) continue; 1203 CHKERRQ(DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS)); 1204 CHKERRQ(DMLabelInsertIS(label, pointIS, remoteRank)); 1205 CHKERRQ(ISDestroy(&pointIS)); 1206 } 1207 CHKERRQ(ISRestoreIndices(rankIS, &ranks)); 1208 CHKERRQ(ISDestroy(&rankIS)); 1209 CHKERRQ(DMLabelDestroy(&lblRoots)); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /*@ 1214 DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label 1215 1216 Input Parameters: 1217 + dm - The DM 1218 . rootLabel - DMLabel assigning ranks to local roots 1219 - processSF - A star forest mapping into the local index on each remote rank 1220 1221 Output Parameter: 1222 . leafLabel - DMLabel assigning ranks to remote roots 1223 1224 Note: The rootLabel defines a send pattern by mapping local points to remote target ranks. The 1225 resulting leafLabel is a receiver mapping of remote roots to their parent rank. 1226 1227 Level: developer 1228 1229 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap() 1230 @*/ 1231 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel) 1232 { 1233 MPI_Comm comm; 1234 PetscMPIInt rank, size, r; 1235 PetscInt p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize; 1236 PetscSF sfPoint; 1237 PetscSection rootSection; 1238 PetscSFNode *rootPoints, *leafPoints; 1239 const PetscSFNode *remote; 1240 const PetscInt *local, *neighbors; 1241 IS valueIS; 1242 PetscBool mpiOverflow = PETSC_FALSE; 1243 1244 PetscFunctionBegin; 1245 CHKERRQ(PetscLogEventBegin(DMPLEX_PartLabelInvert,dm,0,0,0)); 1246 CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm)); 1247 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 1248 CHKERRMPI(MPI_Comm_size(comm, &size)); 1249 CHKERRQ(DMGetPointSF(dm, &sfPoint)); 1250 1251 /* Convert to (point, rank) and use actual owners */ 1252 CHKERRQ(PetscSectionCreate(comm, &rootSection)); 1253 CHKERRQ(PetscSectionSetChart(rootSection, 0, size)); 1254 CHKERRQ(DMLabelGetValueIS(rootLabel, &valueIS)); 1255 CHKERRQ(ISGetLocalSize(valueIS, &numNeighbors)); 1256 CHKERRQ(ISGetIndices(valueIS, &neighbors)); 1257 for (n = 0; n < numNeighbors; ++n) { 1258 CHKERRQ(DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints)); 1259 CHKERRQ(PetscSectionAddDof(rootSection, neighbors[n], numPoints)); 1260 } 1261 CHKERRQ(PetscSectionSetUp(rootSection)); 1262 CHKERRQ(PetscSectionGetStorageSize(rootSection, &rootSize)); 1263 CHKERRQ(PetscMalloc1(rootSize, &rootPoints)); 1264 CHKERRQ(PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote)); 1265 for (n = 0; n < numNeighbors; ++n) { 1266 IS pointIS; 1267 const PetscInt *points; 1268 1269 CHKERRQ(PetscSectionGetOffset(rootSection, neighbors[n], &off)); 1270 CHKERRQ(DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS)); 1271 CHKERRQ(ISGetLocalSize(pointIS, &numPoints)); 1272 CHKERRQ(ISGetIndices(pointIS, &points)); 1273 for (p = 0; p < numPoints; ++p) { 1274 if (local) CHKERRQ(PetscFindInt(points[p], nleaves, local, &l)); 1275 else {l = -1;} 1276 if (l >= 0) {rootPoints[off+p] = remote[l];} 1277 else {rootPoints[off+p].index = points[p]; rootPoints[off+p].rank = rank;} 1278 } 1279 CHKERRQ(ISRestoreIndices(pointIS, &points)); 1280 CHKERRQ(ISDestroy(&pointIS)); 1281 } 1282 1283 /* Try to communicate overlap using All-to-All */ 1284 if (!processSF) { 1285 PetscInt64 counter = 0; 1286 PetscBool locOverflow = PETSC_FALSE; 1287 PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls; 1288 1289 CHKERRQ(PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls)); 1290 for (n = 0; n < numNeighbors; ++n) { 1291 CHKERRQ(PetscSectionGetDof(rootSection, neighbors[n], &dof)); 1292 CHKERRQ(PetscSectionGetOffset(rootSection, neighbors[n], &off)); 1293 #if defined(PETSC_USE_64BIT_INDICES) 1294 if (dof > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;} 1295 if (off > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;} 1296 #endif 1297 scounts[neighbors[n]] = (PetscMPIInt) dof; 1298 sdispls[neighbors[n]] = (PetscMPIInt) off; 1299 } 1300 CHKERRMPI(MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm)); 1301 for (r = 0; r < size; ++r) { rdispls[r] = (int)counter; counter += rcounts[r]; } 1302 if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE; 1303 CHKERRMPI(MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm)); 1304 if (!mpiOverflow) { 1305 CHKERRQ(PetscInfo(dm,"Using Alltoallv for mesh distribution\n")); 1306 leafSize = (PetscInt) counter; 1307 CHKERRQ(PetscMalloc1(leafSize, &leafPoints)); 1308 CHKERRMPI(MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm)); 1309 } 1310 CHKERRQ(PetscFree4(scounts, sdispls, rcounts, rdispls)); 1311 } 1312 1313 /* Communicate overlap using process star forest */ 1314 if (processSF || mpiOverflow) { 1315 PetscSF procSF; 1316 PetscSection leafSection; 1317 1318 if (processSF) { 1319 CHKERRQ(PetscInfo(dm,"Using processSF for mesh distribution\n")); 1320 CHKERRQ(PetscObjectReference((PetscObject)processSF)); 1321 procSF = processSF; 1322 } else { 1323 CHKERRQ(PetscInfo(dm,"Using processSF for mesh distribution (MPI overflow)\n")); 1324 CHKERRQ(PetscSFCreate(comm,&procSF)); 1325 CHKERRQ(PetscSFSetGraphWithPattern(procSF,NULL,PETSCSF_PATTERN_ALLTOALL)); 1326 } 1327 1328 CHKERRQ(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection)); 1329 CHKERRQ(DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void**) &leafPoints)); 1330 CHKERRQ(PetscSectionGetStorageSize(leafSection, &leafSize)); 1331 CHKERRQ(PetscSectionDestroy(&leafSection)); 1332 CHKERRQ(PetscSFDestroy(&procSF)); 1333 } 1334 1335 for (p = 0; p < leafSize; p++) { 1336 CHKERRQ(DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank)); 1337 } 1338 1339 CHKERRQ(ISRestoreIndices(valueIS, &neighbors)); 1340 CHKERRQ(ISDestroy(&valueIS)); 1341 CHKERRQ(PetscSectionDestroy(&rootSection)); 1342 CHKERRQ(PetscFree(rootPoints)); 1343 CHKERRQ(PetscFree(leafPoints)); 1344 CHKERRQ(PetscLogEventEnd(DMPLEX_PartLabelInvert,dm,0,0,0)); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /*@ 1349 DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points 1350 1351 Input Parameters: 1352 + dm - The DM 1353 - label - DMLabel assigning ranks to remote roots 1354 1355 Output Parameter: 1356 . sf - The star forest communication context encapsulating the defined mapping 1357 1358 Note: The incoming label is a receiver mapping of remote points to their parent rank. 1359 1360 Level: developer 1361 1362 .seealso: DMPlexDistribute(), DMPlexCreateOverlap() 1363 @*/ 1364 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf) 1365 { 1366 PetscMPIInt rank; 1367 PetscInt n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors; 1368 PetscSFNode *remotePoints; 1369 IS remoteRootIS, neighborsIS; 1370 const PetscInt *remoteRoots, *neighbors; 1371 1372 PetscFunctionBegin; 1373 CHKERRQ(PetscLogEventBegin(DMPLEX_PartLabelCreateSF,dm,0,0,0)); 1374 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank)); 1375 1376 CHKERRQ(DMLabelGetValueIS(label, &neighborsIS)); 1377 #if 0 1378 { 1379 IS is; 1380 CHKERRQ(ISDuplicate(neighborsIS, &is)); 1381 CHKERRQ(ISSort(is)); 1382 CHKERRQ(ISDestroy(&neighborsIS)); 1383 neighborsIS = is; 1384 } 1385 #endif 1386 CHKERRQ(ISGetLocalSize(neighborsIS, &nNeighbors)); 1387 CHKERRQ(ISGetIndices(neighborsIS, &neighbors)); 1388 for (numRemote = 0, n = 0; n < nNeighbors; ++n) { 1389 CHKERRQ(DMLabelGetStratumSize(label, neighbors[n], &numPoints)); 1390 numRemote += numPoints; 1391 } 1392 CHKERRQ(PetscMalloc1(numRemote, &remotePoints)); 1393 /* Put owned points first */ 1394 CHKERRQ(DMLabelGetStratumSize(label, rank, &numPoints)); 1395 if (numPoints > 0) { 1396 CHKERRQ(DMLabelGetStratumIS(label, rank, &remoteRootIS)); 1397 CHKERRQ(ISGetIndices(remoteRootIS, &remoteRoots)); 1398 for (p = 0; p < numPoints; p++) { 1399 remotePoints[idx].index = remoteRoots[p]; 1400 remotePoints[idx].rank = rank; 1401 idx++; 1402 } 1403 CHKERRQ(ISRestoreIndices(remoteRootIS, &remoteRoots)); 1404 CHKERRQ(ISDestroy(&remoteRootIS)); 1405 } 1406 /* Now add remote points */ 1407 for (n = 0; n < nNeighbors; ++n) { 1408 const PetscInt nn = neighbors[n]; 1409 1410 CHKERRQ(DMLabelGetStratumSize(label, nn, &numPoints)); 1411 if (nn == rank || numPoints <= 0) continue; 1412 CHKERRQ(DMLabelGetStratumIS(label, nn, &remoteRootIS)); 1413 CHKERRQ(ISGetIndices(remoteRootIS, &remoteRoots)); 1414 for (p = 0; p < numPoints; p++) { 1415 remotePoints[idx].index = remoteRoots[p]; 1416 remotePoints[idx].rank = nn; 1417 idx++; 1418 } 1419 CHKERRQ(ISRestoreIndices(remoteRootIS, &remoteRoots)); 1420 CHKERRQ(ISDestroy(&remoteRootIS)); 1421 } 1422 CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject) dm), sf)); 1423 CHKERRQ(DMPlexGetChart(dm, &pStart, &pEnd)); 1424 CHKERRQ(PetscSFSetGraph(*sf, pEnd-pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER)); 1425 CHKERRQ(PetscSFSetUp(*sf)); 1426 CHKERRQ(ISDestroy(&neighborsIS)); 1427 CHKERRQ(PetscLogEventEnd(DMPLEX_PartLabelCreateSF,dm,0,0,0)); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #if defined(PETSC_HAVE_PARMETIS) 1432 #include <parmetis.h> 1433 #endif 1434 1435 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors 1436 * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take 1437 * them out in that case. */ 1438 #if defined(PETSC_HAVE_PARMETIS) 1439 /*@C 1440 1441 DMPlexRewriteSF - Rewrites the ownership of the SF of a DM (in place). 1442 1443 Input parameters: 1444 + dm - The DMPlex object. 1445 . n - The number of points. 1446 . pointsToRewrite - The points in the SF whose ownership will change. 1447 . targetOwners - New owner for each element in pointsToRewrite. 1448 - degrees - Degrees of the points in the SF as obtained by PetscSFComputeDegreeBegin/PetscSFComputeDegreeEnd. 1449 1450 Level: developer 1451 1452 @*/ 1453 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees) 1454 { 1455 PetscInt pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs; 1456 PetscInt *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew; 1457 PetscSFNode *leafLocationsNew; 1458 const PetscSFNode *iremote; 1459 const PetscInt *ilocal; 1460 PetscBool *isLeaf; 1461 PetscSF sf; 1462 MPI_Comm comm; 1463 PetscMPIInt rank, size; 1464 1465 PetscFunctionBegin; 1466 CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm)); 1467 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 1468 CHKERRMPI(MPI_Comm_size(comm, &size)); 1469 CHKERRQ(DMPlexGetChart(dm, &pStart, &pEnd)); 1470 1471 CHKERRQ(DMGetPointSF(dm, &sf)); 1472 CHKERRQ(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote)); 1473 CHKERRQ(PetscMalloc1(pEnd-pStart, &isLeaf)); 1474 for (i=0; i<pEnd-pStart; i++) { 1475 isLeaf[i] = PETSC_FALSE; 1476 } 1477 for (i=0; i<nleafs; i++) { 1478 isLeaf[ilocal[i]-pStart] = PETSC_TRUE; 1479 } 1480 1481 CHKERRQ(PetscMalloc1(pEnd-pStart+1, &cumSumDegrees)); 1482 cumSumDegrees[0] = 0; 1483 for (i=1; i<=pEnd-pStart; i++) { 1484 cumSumDegrees[i] = cumSumDegrees[i-1] + degrees[i-1]; 1485 } 1486 sumDegrees = cumSumDegrees[pEnd-pStart]; 1487 /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */ 1488 1489 CHKERRQ(PetscMalloc1(sumDegrees, &locationsOfLeafs)); 1490 CHKERRQ(PetscMalloc1(pEnd-pStart, &rankOnLeafs)); 1491 for (i=0; i<pEnd-pStart; i++) { 1492 rankOnLeafs[i] = rank; 1493 } 1494 CHKERRQ(PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs)); 1495 CHKERRQ(PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs)); 1496 CHKERRQ(PetscFree(rankOnLeafs)); 1497 1498 /* get the remote local points of my leaves */ 1499 CHKERRQ(PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs)); 1500 CHKERRQ(PetscMalloc1(pEnd-pStart, &points)); 1501 for (i=0; i<pEnd-pStart; i++) { 1502 points[i] = pStart+i; 1503 } 1504 CHKERRQ(PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs)); 1505 CHKERRQ(PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs)); 1506 CHKERRQ(PetscFree(points)); 1507 /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */ 1508 CHKERRQ(PetscMalloc1(pEnd-pStart, &newOwners)); 1509 CHKERRQ(PetscMalloc1(pEnd-pStart, &newNumbers)); 1510 for (i=0; i<pEnd-pStart; i++) { 1511 newOwners[i] = -1; 1512 newNumbers[i] = -1; 1513 } 1514 { 1515 PetscInt oldNumber, newNumber, oldOwner, newOwner; 1516 for (i=0; i<n; i++) { 1517 oldNumber = pointsToRewrite[i]; 1518 newNumber = -1; 1519 oldOwner = rank; 1520 newOwner = targetOwners[i]; 1521 if (oldOwner == newOwner) { 1522 newNumber = oldNumber; 1523 } else { 1524 for (j=0; j<degrees[oldNumber]; j++) { 1525 if (locationsOfLeafs[cumSumDegrees[oldNumber]+j] == newOwner) { 1526 newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber]+j]; 1527 break; 1528 } 1529 } 1530 } 1531 PetscCheck(newNumber != -1,PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex."); 1532 1533 newOwners[oldNumber] = newOwner; 1534 newNumbers[oldNumber] = newNumber; 1535 } 1536 } 1537 CHKERRQ(PetscFree(cumSumDegrees)); 1538 CHKERRQ(PetscFree(locationsOfLeafs)); 1539 CHKERRQ(PetscFree(remoteLocalPointOfLeafs)); 1540 1541 CHKERRQ(PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners,MPI_REPLACE)); 1542 CHKERRQ(PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners,MPI_REPLACE)); 1543 CHKERRQ(PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers,MPI_REPLACE)); 1544 CHKERRQ(PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers,MPI_REPLACE)); 1545 1546 /* Now count how many leafs we have on each processor. */ 1547 leafCounter=0; 1548 for (i=0; i<pEnd-pStart; i++) { 1549 if (newOwners[i] >= 0) { 1550 if (newOwners[i] != rank) { 1551 leafCounter++; 1552 } 1553 } else { 1554 if (isLeaf[i]) { 1555 leafCounter++; 1556 } 1557 } 1558 } 1559 1560 /* Now set up the new sf by creating the leaf arrays */ 1561 CHKERRQ(PetscMalloc1(leafCounter, &leafsNew)); 1562 CHKERRQ(PetscMalloc1(leafCounter, &leafLocationsNew)); 1563 1564 leafCounter = 0; 1565 counter = 0; 1566 for (i=0; i<pEnd-pStart; i++) { 1567 if (newOwners[i] >= 0) { 1568 if (newOwners[i] != rank) { 1569 leafsNew[leafCounter] = i; 1570 leafLocationsNew[leafCounter].rank = newOwners[i]; 1571 leafLocationsNew[leafCounter].index = newNumbers[i]; 1572 leafCounter++; 1573 } 1574 } else { 1575 if (isLeaf[i]) { 1576 leafsNew[leafCounter] = i; 1577 leafLocationsNew[leafCounter].rank = iremote[counter].rank; 1578 leafLocationsNew[leafCounter].index = iremote[counter].index; 1579 leafCounter++; 1580 } 1581 } 1582 if (isLeaf[i]) { 1583 counter++; 1584 } 1585 } 1586 1587 CHKERRQ(PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER)); 1588 CHKERRQ(PetscFree(newOwners)); 1589 CHKERRQ(PetscFree(newNumbers)); 1590 CHKERRQ(PetscFree(isLeaf)); 1591 PetscFunctionReturn(0); 1592 } 1593 1594 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer) 1595 { 1596 PetscInt *distribution, min, max, sum; 1597 PetscMPIInt rank, size; 1598 1599 PetscFunctionBegin; 1600 CHKERRMPI(MPI_Comm_size(comm, &size)); 1601 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 1602 CHKERRQ(PetscCalloc1(size, &distribution)); 1603 for (PetscInt i=0; i<n; i++) { 1604 if (part) distribution[part[i]] += vtxwgt[skip*i]; 1605 else distribution[rank] += vtxwgt[skip*i]; 1606 } 1607 CHKERRMPI(MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm)); 1608 min = distribution[0]; 1609 max = distribution[0]; 1610 sum = distribution[0]; 1611 for (PetscInt i=1; i<size; i++) { 1612 if (distribution[i]<min) min=distribution[i]; 1613 if (distribution[i]>max) max=distribution[i]; 1614 sum += distribution[i]; 1615 } 1616 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Min: %D, Avg: %D, Max: %D, Balance: %f\n", min, sum/size, max, (max*1.*size)/sum)); 1617 CHKERRQ(PetscFree(distribution)); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #endif 1622 1623 /*@ 1624 DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the PointSF of the DM inplace. 1625 1626 Input parameters: 1627 + dm - The DMPlex object. 1628 . entityDepth - depth of the entity to balance (0 -> balance vertices). 1629 . useInitialGuess - whether to use the current distribution as initial guess (only used by ParMETIS). 1630 - parallel - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS. 1631 1632 Output parameters: 1633 . success - whether the graph partitioning was successful or not. If not, try useInitialGuess=True and parallel=True. 1634 1635 Level: intermediate 1636 1637 @*/ 1638 1639 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success) 1640 { 1641 #if defined(PETSC_HAVE_PARMETIS) 1642 PetscSF sf; 1643 PetscInt ierr, i, j, idx, jdx; 1644 PetscInt eBegin, eEnd, nroots, nleafs, pStart, pEnd; 1645 const PetscInt *degrees, *ilocal; 1646 const PetscSFNode *iremote; 1647 PetscBool *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned; 1648 PetscInt numExclusivelyOwned, numNonExclusivelyOwned; 1649 PetscMPIInt rank, size; 1650 PetscInt *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers; 1651 const PetscInt *cumSumVertices; 1652 PetscInt offset, counter; 1653 PetscInt lenadjncy; 1654 PetscInt *xadj, *adjncy, *vtxwgt; 1655 PetscInt lenxadj; 1656 PetscInt *adjwgt = NULL; 1657 PetscInt *part, *options; 1658 PetscInt nparts, wgtflag, numflag, ncon, edgecut; 1659 real_t *ubvec; 1660 PetscInt *firstVertices, *renumbering; 1661 PetscInt failed, failedGlobal; 1662 MPI_Comm comm; 1663 Mat A, Apre; 1664 const char *prefix = NULL; 1665 PetscViewer viewer; 1666 PetscViewerFormat format; 1667 PetscLayout layout; 1668 1669 PetscFunctionBegin; 1670 if (success) *success = PETSC_FALSE; 1671 CHKERRQ(PetscObjectGetComm((PetscObject) dm, &comm)); 1672 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 1673 CHKERRMPI(MPI_Comm_size(comm, &size)); 1674 if (size==1) PetscFunctionReturn(0); 1675 1676 CHKERRQ(PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 1677 1678 CHKERRQ(PetscOptionsGetViewer(comm,((PetscObject)dm)->options, prefix,"-dm_rebalance_partition_view",&viewer,&format,NULL)); 1679 if (viewer) { 1680 CHKERRQ(PetscViewerPushFormat(viewer,format)); 1681 } 1682 1683 /* Figure out all points in the plex that we are interested in balancing. */ 1684 CHKERRQ(DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd)); 1685 CHKERRQ(DMPlexGetChart(dm, &pStart, &pEnd)); 1686 CHKERRQ(PetscMalloc1(pEnd-pStart, &toBalance)); 1687 1688 for (i=0; i<pEnd-pStart; i++) { 1689 toBalance[i] = (PetscBool)(i-pStart>=eBegin && i-pStart<eEnd); 1690 } 1691 1692 /* There are three types of points: 1693 * exclusivelyOwned: points that are owned by this process and only seen by this process 1694 * nonExclusivelyOwned: points that are owned by this process but seen by at least another process 1695 * leaf: a point that is seen by this process but owned by a different process 1696 */ 1697 CHKERRQ(DMGetPointSF(dm, &sf)); 1698 CHKERRQ(PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote)); 1699 CHKERRQ(PetscMalloc1(pEnd-pStart, &isLeaf)); 1700 CHKERRQ(PetscMalloc1(pEnd-pStart, &isNonExclusivelyOwned)); 1701 CHKERRQ(PetscMalloc1(pEnd-pStart, &isExclusivelyOwned)); 1702 for (i=0; i<pEnd-pStart; i++) { 1703 isNonExclusivelyOwned[i] = PETSC_FALSE; 1704 isExclusivelyOwned[i] = PETSC_FALSE; 1705 isLeaf[i] = PETSC_FALSE; 1706 } 1707 1708 /* start by marking all the leafs */ 1709 for (i=0; i<nleafs; i++) { 1710 isLeaf[ilocal[i]-pStart] = PETSC_TRUE; 1711 } 1712 1713 /* for an owned point, we can figure out whether another processor sees it or 1714 * not by calculating its degree */ 1715 CHKERRQ(PetscSFComputeDegreeBegin(sf, °rees)); 1716 CHKERRQ(PetscSFComputeDegreeEnd(sf, °rees)); 1717 1718 numExclusivelyOwned = 0; 1719 numNonExclusivelyOwned = 0; 1720 for (i=0; i<pEnd-pStart; i++) { 1721 if (toBalance[i]) { 1722 if (degrees[i] > 0) { 1723 isNonExclusivelyOwned[i] = PETSC_TRUE; 1724 numNonExclusivelyOwned += 1; 1725 } else { 1726 if (!isLeaf[i]) { 1727 isExclusivelyOwned[i] = PETSC_TRUE; 1728 numExclusivelyOwned += 1; 1729 } 1730 } 1731 } 1732 } 1733 1734 /* We are going to build a graph with one vertex per core representing the 1735 * exclusively owned points and then one vertex per nonExclusively owned 1736 * point. */ 1737 1738 CHKERRQ(PetscLayoutCreate(comm, &layout)); 1739 CHKERRQ(PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned)); 1740 CHKERRQ(PetscLayoutSetUp(layout)); 1741 CHKERRQ(PetscLayoutGetRanges(layout, &cumSumVertices)); 1742 1743 CHKERRQ(PetscMalloc1(pEnd-pStart, &globalNumbersOfLocalOwnedVertices)); 1744 for (i=0; i<pEnd-pStart; i++) {globalNumbersOfLocalOwnedVertices[i] = pStart - 1;} 1745 offset = cumSumVertices[rank]; 1746 counter = 0; 1747 for (i=0; i<pEnd-pStart; i++) { 1748 if (toBalance[i]) { 1749 if (degrees[i] > 0) { 1750 globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset; 1751 counter++; 1752 } 1753 } 1754 } 1755 1756 /* send the global numbers of vertices I own to the leafs so that they know to connect to it */ 1757 CHKERRQ(PetscMalloc1(pEnd-pStart, &leafGlobalNumbers)); 1758 CHKERRQ(PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers,MPI_REPLACE)); 1759 CHKERRQ(PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers,MPI_REPLACE)); 1760 1761 /* Now start building the data structure for ParMETIS */ 1762 1763 CHKERRQ(MatCreate(comm, &Apre)); 1764 CHKERRQ(MatSetType(Apre, MATPREALLOCATOR)); 1765 CHKERRQ(MatSetSizes(Apre, 1+numNonExclusivelyOwned, 1+numNonExclusivelyOwned, cumSumVertices[size], cumSumVertices[size])); 1766 CHKERRQ(MatSetUp(Apre)); 1767 1768 for (i=0; i<pEnd-pStart; i++) { 1769 if (toBalance[i]) { 1770 idx = cumSumVertices[rank]; 1771 if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i]; 1772 else if (isLeaf[i]) jdx = leafGlobalNumbers[i]; 1773 else continue; 1774 CHKERRQ(MatSetValue(Apre, idx, jdx, 1, INSERT_VALUES)); 1775 CHKERRQ(MatSetValue(Apre, jdx, idx, 1, INSERT_VALUES)); 1776 } 1777 } 1778 1779 CHKERRQ(MatAssemblyBegin(Apre, MAT_FINAL_ASSEMBLY)); 1780 CHKERRQ(MatAssemblyEnd(Apre, MAT_FINAL_ASSEMBLY)); 1781 1782 CHKERRQ(MatCreate(comm, &A)); 1783 CHKERRQ(MatSetType(A, MATMPIAIJ)); 1784 CHKERRQ(MatSetSizes(A, 1+numNonExclusivelyOwned, 1+numNonExclusivelyOwned, cumSumVertices[size], cumSumVertices[size])); 1785 CHKERRQ(MatPreallocatorPreallocate(Apre, PETSC_FALSE, A)); 1786 CHKERRQ(MatDestroy(&Apre)); 1787 1788 for (i=0; i<pEnd-pStart; i++) { 1789 if (toBalance[i]) { 1790 idx = cumSumVertices[rank]; 1791 if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i]; 1792 else if (isLeaf[i]) jdx = leafGlobalNumbers[i]; 1793 else continue; 1794 CHKERRQ(MatSetValue(A, idx, jdx, 1, INSERT_VALUES)); 1795 CHKERRQ(MatSetValue(A, jdx, idx, 1, INSERT_VALUES)); 1796 } 1797 } 1798 1799 CHKERRQ(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1800 CHKERRQ(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1801 CHKERRQ(PetscFree(leafGlobalNumbers)); 1802 CHKERRQ(PetscFree(globalNumbersOfLocalOwnedVertices)); 1803 1804 nparts = size; 1805 wgtflag = 2; 1806 numflag = 0; 1807 ncon = 2; 1808 real_t *tpwgts; 1809 CHKERRQ(PetscMalloc1(ncon * nparts, &tpwgts)); 1810 for (i=0; i<ncon*nparts; i++) { 1811 tpwgts[i] = 1./(nparts); 1812 } 1813 1814 CHKERRQ(PetscMalloc1(ncon, &ubvec)); 1815 ubvec[0] = 1.01; 1816 ubvec[1] = 1.01; 1817 lenadjncy = 0; 1818 for (i=0; i<1+numNonExclusivelyOwned; i++) { 1819 PetscInt temp=0; 1820 CHKERRQ(MatGetRow(A, cumSumVertices[rank] + i, &temp, NULL, NULL)); 1821 lenadjncy += temp; 1822 CHKERRQ(MatRestoreRow(A, cumSumVertices[rank] + i, &temp, NULL, NULL)); 1823 } 1824 CHKERRQ(PetscMalloc1(lenadjncy, &adjncy)); 1825 lenxadj = 2 + numNonExclusivelyOwned; 1826 CHKERRQ(PetscMalloc1(lenxadj, &xadj)); 1827 xadj[0] = 0; 1828 counter = 0; 1829 for (i=0; i<1+numNonExclusivelyOwned; i++) { 1830 PetscInt temp=0; 1831 const PetscInt *cols; 1832 CHKERRQ(MatGetRow(A, cumSumVertices[rank] + i, &temp, &cols, NULL)); 1833 CHKERRQ(PetscArraycpy(&adjncy[counter], cols, temp)); 1834 counter += temp; 1835 xadj[i+1] = counter; 1836 CHKERRQ(MatRestoreRow(A, cumSumVertices[rank] + i, &temp, &cols, NULL)); 1837 } 1838 1839 CHKERRQ(PetscMalloc1(cumSumVertices[rank+1]-cumSumVertices[rank], &part)); 1840 CHKERRQ(PetscMalloc1(ncon*(1 + numNonExclusivelyOwned), &vtxwgt)); 1841 vtxwgt[0] = numExclusivelyOwned; 1842 if (ncon>1) vtxwgt[1] = 1; 1843 for (i=0; i<numNonExclusivelyOwned; i++) { 1844 vtxwgt[ncon*(i+1)] = 1; 1845 if (ncon>1) vtxwgt[ncon*(i+1)+1] = 0; 1846 } 1847 1848 if (viewer) { 1849 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %D on interface of mesh distribution.\n", entityDepth)); 1850 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %D\n", cumSumVertices[size])); 1851 } 1852 if (parallel) { 1853 CHKERRQ(PetscMalloc1(4, &options)); 1854 options[0] = 1; 1855 options[1] = 0; /* Verbosity */ 1856 options[2] = 0; /* Seed */ 1857 options[3] = PARMETIS_PSR_COUPLED; /* Seed */ 1858 if (viewer) CHKERRQ(PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n")); 1859 if (useInitialGuess) { 1860 if (viewer) CHKERRQ(PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n")); 1861 PetscStackPush("ParMETIS_V3_RefineKway"); 1862 ierr = ParMETIS_V3_RefineKway((PetscInt*)cumSumVertices, xadj, adjncy, vtxwgt, adjwgt, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); 1863 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()"); 1864 PetscStackPop; 1865 } else { 1866 PetscStackPush("ParMETIS_V3_PartKway"); 1867 ierr = ParMETIS_V3_PartKway((PetscInt*)cumSumVertices, xadj, adjncy, vtxwgt, adjwgt, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm); 1868 PetscStackPop; 1869 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()"); 1870 } 1871 CHKERRQ(PetscFree(options)); 1872 } else { 1873 if (viewer) CHKERRQ(PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n")); 1874 Mat As; 1875 PetscInt numRows; 1876 PetscInt *partGlobal; 1877 CHKERRQ(MatCreateRedundantMatrix(A, size, MPI_COMM_NULL, MAT_INITIAL_MATRIX, &As)); 1878 1879 PetscInt *numExclusivelyOwnedAll; 1880 CHKERRQ(PetscMalloc1(size, &numExclusivelyOwnedAll)); 1881 numExclusivelyOwnedAll[rank] = numExclusivelyOwned; 1882 CHKERRMPI(MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,numExclusivelyOwnedAll,1,MPIU_INT,comm)); 1883 1884 CHKERRQ(MatGetSize(As, &numRows, NULL)); 1885 CHKERRQ(PetscMalloc1(numRows, &partGlobal)); 1886 if (rank == 0) { 1887 PetscInt *adjncy_g, *xadj_g, *vtxwgt_g; 1888 lenadjncy = 0; 1889 1890 for (i=0; i<numRows; i++) { 1891 PetscInt temp=0; 1892 CHKERRQ(MatGetRow(As, i, &temp, NULL, NULL)); 1893 lenadjncy += temp; 1894 CHKERRQ(MatRestoreRow(As, i, &temp, NULL, NULL)); 1895 } 1896 CHKERRQ(PetscMalloc1(lenadjncy, &adjncy_g)); 1897 lenxadj = 1 + numRows; 1898 CHKERRQ(PetscMalloc1(lenxadj, &xadj_g)); 1899 xadj_g[0] = 0; 1900 counter = 0; 1901 for (i=0; i<numRows; i++) { 1902 PetscInt temp=0; 1903 const PetscInt *cols; 1904 CHKERRQ(MatGetRow(As, i, &temp, &cols, NULL)); 1905 CHKERRQ(PetscArraycpy(&adjncy_g[counter], cols, temp)); 1906 counter += temp; 1907 xadj_g[i+1] = counter; 1908 CHKERRQ(MatRestoreRow(As, i, &temp, &cols, NULL)); 1909 } 1910 CHKERRQ(PetscMalloc1(2*numRows, &vtxwgt_g)); 1911 for (i=0; i<size; i++) { 1912 vtxwgt_g[ncon*cumSumVertices[i]] = numExclusivelyOwnedAll[i]; 1913 if (ncon>1) vtxwgt_g[ncon*cumSumVertices[i]+1] = 1; 1914 for (j=cumSumVertices[i]+1; j<cumSumVertices[i+1]; j++) { 1915 vtxwgt_g[ncon*j] = 1; 1916 if (ncon>1) vtxwgt_g[2*j+1] = 0; 1917 } 1918 } 1919 CHKERRQ(PetscMalloc1(64, &options)); 1920 ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */ 1921 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()"); 1922 options[METIS_OPTION_CONTIG] = 1; 1923 PetscStackPush("METIS_PartGraphKway"); 1924 ierr = METIS_PartGraphKway(&numRows, &ncon, xadj_g, adjncy_g, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal); 1925 PetscStackPop; 1926 PetscCheck(ierr == METIS_OK,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()"); 1927 CHKERRQ(PetscFree(options)); 1928 CHKERRQ(PetscFree(xadj_g)); 1929 CHKERRQ(PetscFree(adjncy_g)); 1930 CHKERRQ(PetscFree(vtxwgt_g)); 1931 } 1932 CHKERRQ(PetscFree(numExclusivelyOwnedAll)); 1933 1934 /* Now scatter the parts array. */ 1935 { 1936 PetscMPIInt *counts, *mpiCumSumVertices; 1937 CHKERRQ(PetscMalloc1(size, &counts)); 1938 CHKERRQ(PetscMalloc1(size+1, &mpiCumSumVertices)); 1939 for (i=0; i<size; i++) { 1940 CHKERRQ(PetscMPIIntCast(cumSumVertices[i+1] - cumSumVertices[i], &(counts[i]))); 1941 } 1942 for (i=0; i<=size; i++) { 1943 CHKERRQ(PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i]))); 1944 } 1945 CHKERRMPI(MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm)); 1946 CHKERRQ(PetscFree(counts)); 1947 CHKERRQ(PetscFree(mpiCumSumVertices)); 1948 } 1949 1950 CHKERRQ(PetscFree(partGlobal)); 1951 CHKERRQ(MatDestroy(&As)); 1952 } 1953 1954 CHKERRQ(MatDestroy(&A)); 1955 CHKERRQ(PetscFree(ubvec)); 1956 CHKERRQ(PetscFree(tpwgts)); 1957 1958 /* Now rename the result so that the vertex resembling the exclusively owned points stays on the same rank */ 1959 1960 CHKERRQ(PetscMalloc1(size, &firstVertices)); 1961 CHKERRQ(PetscMalloc1(size, &renumbering)); 1962 firstVertices[rank] = part[0]; 1963 CHKERRMPI(MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,firstVertices,1,MPIU_INT,comm)); 1964 for (i=0; i<size; i++) { 1965 renumbering[firstVertices[i]] = i; 1966 } 1967 for (i=0; i<cumSumVertices[rank+1]-cumSumVertices[rank]; i++) { 1968 part[i] = renumbering[part[i]]; 1969 } 1970 /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */ 1971 failed = (PetscInt)(part[0] != rank); 1972 CHKERRMPI(MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm)); 1973 1974 CHKERRQ(PetscFree(firstVertices)); 1975 CHKERRQ(PetscFree(renumbering)); 1976 1977 if (failedGlobal > 0) { 1978 CHKERRQ(PetscLayoutDestroy(&layout)); 1979 CHKERRQ(PetscFree(xadj)); 1980 CHKERRQ(PetscFree(adjncy)); 1981 CHKERRQ(PetscFree(vtxwgt)); 1982 CHKERRQ(PetscFree(toBalance)); 1983 CHKERRQ(PetscFree(isLeaf)); 1984 CHKERRQ(PetscFree(isNonExclusivelyOwned)); 1985 CHKERRQ(PetscFree(isExclusivelyOwned)); 1986 CHKERRQ(PetscFree(part)); 1987 if (viewer) { 1988 CHKERRQ(PetscViewerPopFormat(viewer)); 1989 CHKERRQ(PetscViewerDestroy(&viewer)); 1990 } 1991 CHKERRQ(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 /*Let's check how well we did distributing points*/ 1996 if (viewer) { 1997 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Comparing number of owned entities of depth %D on each process before rebalancing, after rebalancing, and after consistency checks.\n", entityDepth)); 1998 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Initial. ")); 1999 CHKERRQ(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, NULL, viewer)); 2000 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Rebalanced. ")); 2001 CHKERRQ(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer)); 2002 } 2003 2004 /* Now check that every vertex is owned by a process that it is actually connected to. */ 2005 for (i=1; i<=numNonExclusivelyOwned; i++) { 2006 PetscInt loc = 0; 2007 CHKERRQ(PetscFindInt(cumSumVertices[part[i]], xadj[i+1]-xadj[i], &adjncy[xadj[i]], &loc)); 2008 /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */ 2009 if (loc<0) { 2010 part[i] = rank; 2011 } 2012 } 2013 2014 /* Let's see how significant the influences of the previous fixing up step was.*/ 2015 if (viewer) { 2016 CHKERRQ(PetscViewerASCIIPrintf(viewer, "After. ")); 2017 CHKERRQ(DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer)); 2018 } 2019 2020 CHKERRQ(PetscLayoutDestroy(&layout)); 2021 CHKERRQ(PetscFree(xadj)); 2022 CHKERRQ(PetscFree(adjncy)); 2023 CHKERRQ(PetscFree(vtxwgt)); 2024 2025 /* Almost done, now rewrite the SF to reflect the new ownership. */ 2026 { 2027 PetscInt *pointsToRewrite; 2028 CHKERRQ(PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite)); 2029 counter = 0; 2030 for (i=0; i<pEnd-pStart; i++) { 2031 if (toBalance[i]) { 2032 if (isNonExclusivelyOwned[i]) { 2033 pointsToRewrite[counter] = i + pStart; 2034 counter++; 2035 } 2036 } 2037 } 2038 CHKERRQ(DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part+1, degrees)); 2039 CHKERRQ(PetscFree(pointsToRewrite)); 2040 } 2041 2042 CHKERRQ(PetscFree(toBalance)); 2043 CHKERRQ(PetscFree(isLeaf)); 2044 CHKERRQ(PetscFree(isNonExclusivelyOwned)); 2045 CHKERRQ(PetscFree(isExclusivelyOwned)); 2046 CHKERRQ(PetscFree(part)); 2047 if (viewer) { 2048 CHKERRQ(PetscViewerPopFormat(viewer)); 2049 CHKERRQ(PetscViewerDestroy(&viewer)); 2050 } 2051 if (success) *success = PETSC_TRUE; 2052 CHKERRQ(PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0)); 2053 PetscFunctionReturn(0); 2054 #else 2055 SETERRQ(PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis."); 2056 #endif 2057 } 2058