1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - gremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscCall(PetscLayoutSetUp(layout)); 40 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 41 PetscCall(PetscLayoutGetRanges(layout, &range)); 42 PetscCall(PetscMalloc1(nleaves, &remote)); 43 if (nleaves) ls = gremote[0] + 1; 44 for (i = 0; i < nleaves; i++) { 45 const PetscInt idx = gremote[i] - ls; 46 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 47 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 48 remote[i].rank = lr; 49 ls = range[lr]; 50 ln = range[lr + 1] - ls; 51 } else { 52 remote[i].rank = lr; 53 remote[i].index = idx; 54 } 55 } 56 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest 62 63 Collective 64 65 Input Parameter: 66 . sf - star forest 67 68 Output Parameters: 69 + layout - `PetscLayout` defining the global space for roots 70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 71 . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage 72 - gremote - root vertices in global numbering corresponding to leaves in ilocal 73 74 Level: intermediate 75 76 Notes: 77 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 78 The outputs `layout` and `gremote` are freshly created each time this function is called, 79 so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user. 80 81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82 @*/ 83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 84 { 85 PetscInt nr, nl; 86 const PetscSFNode *ir; 87 PetscLayout lt; 88 89 PetscFunctionBegin; 90 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 92 if (gremote) { 93 PetscInt i; 94 const PetscInt *range; 95 PetscInt *gr; 96 97 PetscCall(PetscLayoutGetRanges(lt, &range)); 98 PetscCall(PetscMalloc1(nl, &gr)); 99 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 100 *gremote = gr; 101 } 102 if (nleaves) *nleaves = nl; 103 if (layout) *layout = lt; 104 else PetscCall(PetscLayoutDestroy(<)); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 /*@ 109 PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 110 111 Input Parameters: 112 + sf - The `PetscSF` 113 . localSection - `PetscSection` describing the local data layout 114 - globalSection - `PetscSection` describing the global data layout 115 116 Level: developer 117 118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 119 @*/ 120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 121 { 122 MPI_Comm comm; 123 PetscLayout layout; 124 const PetscInt *ranges; 125 PetscInt *local; 126 PetscSFNode *remote; 127 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 128 PetscMPIInt size, rank; 129 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 133 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 134 135 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 136 PetscCallMPI(MPI_Comm_size(comm, &size)); 137 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 138 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 139 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 140 PetscCall(PetscLayoutCreate(comm, &layout)); 141 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 142 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 143 PetscCall(PetscLayoutSetUp(layout)); 144 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 145 for (p = pStart; p < pEnd; ++p) { 146 PetscInt gdof, gcdof; 147 148 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 149 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 150 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof); 151 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 152 } 153 PetscCall(PetscMalloc1(nleaves, &local)); 154 PetscCall(PetscMalloc1(nleaves, &remote)); 155 for (p = pStart, l = 0; p < pEnd; ++p) { 156 const PetscInt *cind; 157 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 158 159 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 160 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 161 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 162 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 163 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 164 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 165 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 166 if (!gdof) continue; /* Censored point */ 167 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 168 if (gsize != dof - cdof) { 169 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 170 cdof = 0; /* Ignore constraints */ 171 } 172 for (d = 0, c = 0; d < dof; ++d) { 173 if ((c < cdof) && (cind[c] == d)) { 174 ++c; 175 continue; 176 } 177 local[l + d - c] = off + d; 178 } 179 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 180 if (gdof < 0) { 181 for (d = 0; d < gsize; ++d, ++l) { 182 PetscInt offset = -(goff + 1) + d, ir; 183 PetscMPIInt r; 184 185 PetscCall(PetscFindInt(offset, size + 1, ranges, &ir)); 186 PetscCall(PetscMPIIntCast(ir, &r)); 187 if (r < 0) r = -(r + 2); 188 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 189 remote[l].rank = r; 190 remote[l].index = offset - ranges[r]; 191 } 192 } else { 193 for (d = 0; d < gsize; ++d, ++l) { 194 remote[l].rank = rank; 195 remote[l].index = goff + d - ranges[rank]; 196 } 197 } 198 } 199 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 /*@C 206 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 207 208 Collective 209 210 Input Parameters: 211 + sf - The `PetscSF` 212 - rootSection - Section defined on root space 213 214 Output Parameters: 215 + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection` 216 - leafSection - Section defined on the leaf space 217 218 Level: advanced 219 220 Note: 221 Caller must `PetscFree()` `remoteOffsets` if it was requested 222 223 Fortran Note: 224 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 225 226 .seealso: `PetscSF`, `PetscSFCreate()` 227 @*/ 228 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection) 229 { 230 PetscSF embedSF; 231 const PetscInt *indices; 232 IS selected; 233 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c; 234 PetscBool *sub, hasc; 235 236 PetscFunctionBegin; 237 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 238 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 239 if (numFields) { 240 IS perm; 241 242 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 243 leafSection->perm. To keep this permutation set by the user, we grab 244 the reference before calling PetscSectionSetNumFields() and set it 245 back after. */ 246 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 247 PetscCall(PetscObjectReference((PetscObject)perm)); 248 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 249 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 250 PetscCall(ISDestroy(&perm)); 251 } 252 PetscCall(PetscMalloc1(numFields + 2, &sub)); 253 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 254 for (f = 0; f < numFields; ++f) { 255 PetscSectionSym sym, dsym = NULL; 256 const char *name = NULL; 257 PetscInt numComp = 0; 258 259 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 260 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 261 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 262 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 263 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 264 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 265 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 266 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 267 PetscCall(PetscSectionSymDestroy(&dsym)); 268 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 269 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 270 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 271 } 272 } 273 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 274 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 275 rpEnd = PetscMin(rpEnd, nroots); 276 rpEnd = PetscMax(rpStart, rpEnd); 277 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 278 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 279 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 280 if (sub[0]) { 281 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 282 PetscCall(ISGetIndices(selected, &indices)); 283 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 284 PetscCall(ISRestoreIndices(selected, &indices)); 285 PetscCall(ISDestroy(&selected)); 286 } else { 287 PetscCall(PetscObjectReference((PetscObject)sf)); 288 embedSF = sf; 289 } 290 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 291 lpEnd++; 292 293 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 294 295 /* Constrained dof section */ 296 hasc = sub[1]; 297 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 298 299 /* Could fuse these at the cost of copies and extra allocation */ 300 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 301 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 302 if (sub[1]) { 303 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 304 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 305 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 306 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 307 } 308 for (f = 0; f < numFields; ++f) { 309 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 310 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 311 if (sub[2 + f]) { 312 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 313 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 314 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 315 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 316 } 317 } 318 if (remoteOffsets) { 319 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 320 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 321 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 322 } 323 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 324 PetscCall(PetscSectionSetUp(leafSection)); 325 if (hasc) { /* need to communicate bcIndices */ 326 PetscSF bcSF; 327 PetscInt *rOffBc; 328 329 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 330 if (sub[1]) { 331 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 332 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 333 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 334 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 335 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 336 PetscCall(PetscSFDestroy(&bcSF)); 337 } 338 for (f = 0; f < numFields; ++f) { 339 if (sub[2 + f]) { 340 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 342 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 343 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 344 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 345 PetscCall(PetscSFDestroy(&bcSF)); 346 } 347 } 348 PetscCall(PetscFree(rOffBc)); 349 } 350 PetscCall(PetscSFDestroy(&embedSF)); 351 PetscCall(PetscFree(sub)); 352 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /*@C 357 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 358 359 Collective 360 361 Input Parameters: 362 + sf - The `PetscSF` 363 . rootSection - Data layout of remote points for outgoing data (this is layout for roots) 364 - leafSection - Data layout of local points for incoming data (this is layout for leaves) 365 366 Output Parameter: 367 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL` 368 369 Level: developer 370 371 Note: 372 Caller must `PetscFree()` `remoteOffsets` if it was requested 373 374 Fortran Note: 375 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 376 377 .seealso: `PetscSF`, `PetscSFCreate()` 378 @*/ 379 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[]) 380 { 381 PetscSF embedSF; 382 const PetscInt *indices; 383 IS selected; 384 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 385 386 PetscFunctionBegin; 387 *remoteOffsets = NULL; 388 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 389 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 390 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 391 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 392 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 393 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 394 PetscCall(ISGetIndices(selected, &indices)); 395 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 396 PetscCall(ISRestoreIndices(selected, &indices)); 397 PetscCall(ISDestroy(&selected)); 398 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 399 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 400 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 401 PetscCall(PetscSFDestroy(&embedSF)); 402 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 /*@ 407 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 408 409 Collective 410 411 Input Parameters: 412 + sf - The `PetscSF` 413 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 414 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 415 - leafSection - Data layout of local points for incoming data (this is the distributed section) 416 417 Output Parameter: 418 . sectionSF - The new `PetscSF` 419 420 Level: advanced 421 422 Notes: 423 `remoteOffsets` can be `NULL` if `sf` does not reference any points in leafSection 424 425 .seealso: `PetscSF`, `PetscSFCreate()` 426 @*/ 427 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 428 { 429 MPI_Comm comm; 430 const PetscInt *localPoints; 431 const PetscSFNode *remotePoints; 432 PetscInt lpStart, lpEnd; 433 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 434 PetscInt *localIndices; 435 PetscSFNode *remoteIndices; 436 PetscInt i, ind; 437 438 PetscFunctionBegin; 439 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 440 PetscAssertPointer(rootSection, 2); 441 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 442 PetscAssertPointer(leafSection, 4); 443 PetscAssertPointer(sectionSF, 5); 444 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 445 PetscCall(PetscSFCreate(comm, sectionSF)); 446 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 447 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 448 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 449 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 450 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 451 for (i = 0; i < numPoints; ++i) { 452 PetscInt localPoint = localPoints ? localPoints[i] : i; 453 PetscInt dof; 454 455 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 456 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 457 numIndices += dof < 0 ? 0 : dof; 458 } 459 } 460 PetscCall(PetscMalloc1(numIndices, &localIndices)); 461 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 462 /* Create new index graph */ 463 for (i = 0, ind = 0; i < numPoints; ++i) { 464 PetscInt localPoint = localPoints ? localPoints[i] : i; 465 PetscInt rank = remotePoints[i].rank; 466 467 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 468 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 469 PetscInt loff, dof, d; 470 471 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 472 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 473 for (d = 0; d < dof; ++d, ++ind) { 474 localIndices[ind] = loff + d; 475 remoteIndices[ind].rank = rank; 476 remoteIndices[ind].index = remoteOffset + d; 477 } 478 } 479 } 480 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 481 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 482 PetscCall(PetscSFSetUp(*sectionSF)); 483 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 484 PetscFunctionReturn(PETSC_SUCCESS); 485 } 486 487 /*@C 488 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects 489 490 Collective 491 492 Input Parameters: 493 + rmap - `PetscLayout` defining the global root space 494 - lmap - `PetscLayout` defining the global leaf space 495 496 Output Parameter: 497 . sf - The parallel star forest 498 499 Level: intermediate 500 501 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 502 @*/ 503 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 504 { 505 PetscInt i, nroots, nleaves = 0; 506 PetscInt rN, lst, len; 507 PetscMPIInt owner = -1; 508 PetscSFNode *remote; 509 MPI_Comm rcomm = rmap->comm; 510 MPI_Comm lcomm = lmap->comm; 511 PetscMPIInt flg; 512 513 PetscFunctionBegin; 514 PetscAssertPointer(sf, 3); 515 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 516 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 517 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 518 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 519 PetscCall(PetscSFCreate(rcomm, sf)); 520 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 521 PetscCall(PetscLayoutGetSize(rmap, &rN)); 522 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 523 PetscCall(PetscMalloc1(len - lst, &remote)); 524 for (i = lst; i < len && i < rN; i++) { 525 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 526 remote[nleaves].rank = owner; 527 remote[nleaves].index = i - rmap->range[owner]; 528 nleaves++; 529 } 530 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 531 PetscCall(PetscFree(remote)); 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 536 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[]) 537 { 538 PetscInt *owners = map->range; 539 PetscInt n = map->n; 540 PetscSF sf; 541 PetscInt *lidxs, *work = NULL; 542 PetscSFNode *ridxs; 543 PetscMPIInt rank, p = 0; 544 PetscInt r, len = 0; 545 546 PetscFunctionBegin; 547 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 548 /* Create SF where leaves are input idxs and roots are owned idxs */ 549 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 550 PetscCall(PetscMalloc1(n, &lidxs)); 551 for (r = 0; r < n; ++r) lidxs[r] = -1; 552 PetscCall(PetscMalloc1(N, &ridxs)); 553 for (r = 0; r < N; ++r) { 554 const PetscInt idx = idxs[r]; 555 PetscCheck(idx >= 0 && idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 556 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 557 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 558 } 559 ridxs[r].rank = p; 560 ridxs[r].index = idxs[r] - owners[p]; 561 } 562 PetscCall(PetscSFCreate(map->comm, &sf)); 563 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 564 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 565 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 566 if (ogidxs) { /* communicate global idxs */ 567 PetscInt cum = 0, start, *work2; 568 569 PetscCall(PetscMalloc1(n, &work)); 570 PetscCall(PetscCalloc1(N, &work2)); 571 for (r = 0; r < N; ++r) 572 if (idxs[r] >= 0) cum++; 573 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 574 start -= cum; 575 cum = 0; 576 for (r = 0; r < N; ++r) 577 if (idxs[r] >= 0) work2[r] = start + cum++; 578 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 579 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 580 PetscCall(PetscFree(work2)); 581 } 582 PetscCall(PetscSFDestroy(&sf)); 583 /* Compress and put in indices */ 584 for (r = 0; r < n; ++r) 585 if (lidxs[r] >= 0) { 586 if (work) work[len] = work[r]; 587 lidxs[len++] = r; 588 } 589 if (on) *on = len; 590 if (oidxs) *oidxs = lidxs; 591 if (ogidxs) *ogidxs = work; 592 PetscFunctionReturn(PETSC_SUCCESS); 593 } 594 595 /*@ 596 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 597 598 Collective 599 600 Input Parameters: 601 + layout - `PetscLayout` defining the global index space and the rank that brokers each index 602 . numRootIndices - size of rootIndices 603 . rootIndices - `PetscInt` array of global indices of which this process requests ownership 604 . rootLocalIndices - root local index permutation (NULL if no permutation) 605 . rootLocalOffset - offset to be added to root local indices 606 . numLeafIndices - size of leafIndices 607 . leafIndices - `PetscInt` array of global indices with which this process requires data associated 608 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 609 - leafLocalOffset - offset to be added to leaf local indices 610 611 Output Parameters: 612 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 613 - sf - star forest representing the communication pattern from the root space to the leaf space 614 615 Level: advanced 616 617 Example 1: 618 .vb 619 rank : 0 1 2 620 rootIndices : [1 0 2] [3] [3] 621 rootLocalOffset : 100 200 300 622 layout : [0 1] [2] [3] 623 leafIndices : [0] [2] [0 3] 624 leafLocalOffset : 400 500 600 625 626 would build the following PetscSF 627 628 [0] 400 <- (0,101) 629 [1] 500 <- (0,102) 630 [2] 600 <- (0,101) 631 [2] 601 <- (2,300) 632 .ve 633 634 Example 2: 635 .vb 636 rank : 0 1 2 637 rootIndices : [1 0 2] [3] [3] 638 rootLocalOffset : 100 200 300 639 layout : [0 1] [2] [3] 640 leafIndices : rootIndices rootIndices rootIndices 641 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 642 643 would build the following PetscSF 644 645 [1] 200 <- (2,300) 646 .ve 647 648 Example 3: 649 .vb 650 No process requests ownership of global index 1, but no process needs it. 651 652 rank : 0 1 2 653 numRootIndices : 2 1 1 654 rootIndices : [0 2] [3] [3] 655 rootLocalOffset : 100 200 300 656 layout : [0 1] [2] [3] 657 numLeafIndices : 1 1 2 658 leafIndices : [0] [2] [0 3] 659 leafLocalOffset : 400 500 600 660 661 would build the following PetscSF 662 663 [0] 400 <- (0,100) 664 [1] 500 <- (0,101) 665 [2] 600 <- (0,100) 666 [2] 601 <- (2,300) 667 .ve 668 669 Notes: 670 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 671 local size can be set to `PETSC_DECIDE`. 672 673 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 674 ownership of x and sends its own rank and the local index of x to process i. 675 If multiple processes request ownership of x, the one with the highest rank is to own x. 676 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 677 ownership information of x. 678 The output sf is constructed by associating each leaf point to a root point in this way. 679 680 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 681 The optional output `PetscSF` object sfA can be used to push such data to leaf points. 682 683 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 684 must cover that of leafIndices, but need not cover the entire layout. 685 686 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 687 star forest is almost identity, so will only include non-trivial part of the map. 688 689 Developer Notes: 690 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 691 hash(rank, root_local_index) as the bid for the ownership determination. 692 693 .seealso: `PetscSF`, `PetscSFCreate()` 694 @*/ 695 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 696 { 697 MPI_Comm comm = layout->comm; 698 PetscMPIInt size, rank; 699 PetscSF sf1; 700 PetscSFNode *owners, *buffer, *iremote; 701 PetscInt *ilocal, nleaves, N, n, i; 702 #if defined(PETSC_USE_DEBUG) 703 PetscInt N1; 704 #endif 705 PetscBool flag; 706 707 PetscFunctionBegin; 708 if (rootIndices) PetscAssertPointer(rootIndices, 3); 709 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 710 if (leafIndices) PetscAssertPointer(leafIndices, 7); 711 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 712 if (sfA) PetscAssertPointer(sfA, 10); 713 PetscAssertPointer(sf, 11); 714 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 715 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 716 PetscCallMPI(MPI_Comm_size(comm, &size)); 717 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 718 PetscCall(PetscLayoutSetUp(layout)); 719 PetscCall(PetscLayoutGetSize(layout, &N)); 720 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 721 flag = (PetscBool)(leafIndices == rootIndices); 722 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 723 PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 724 #if defined(PETSC_USE_DEBUG) 725 N1 = PETSC_INT_MIN; 726 for (i = 0; i < numRootIndices; i++) 727 if (rootIndices[i] > N1) N1 = rootIndices[i]; 728 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 729 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 730 if (!flag) { 731 N1 = PETSC_INT_MIN; 732 for (i = 0; i < numLeafIndices; i++) 733 if (leafIndices[i] > N1) N1 = leafIndices[i]; 734 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 735 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 736 } 737 #endif 738 /* Reduce: owners -> buffer */ 739 PetscCall(PetscMalloc1(n, &buffer)); 740 PetscCall(PetscSFCreate(comm, &sf1)); 741 PetscCall(PetscSFSetFromOptions(sf1)); 742 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 743 PetscCall(PetscMalloc1(numRootIndices, &owners)); 744 for (i = 0; i < numRootIndices; ++i) { 745 owners[i].rank = rank; 746 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 747 } 748 for (i = 0; i < n; ++i) { 749 buffer[i].index = -1; 750 buffer[i].rank = -1; 751 } 752 PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 753 PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 754 /* Bcast: buffer -> owners */ 755 if (!flag) { 756 /* leafIndices is different from rootIndices */ 757 PetscCall(PetscFree(owners)); 758 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 759 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 760 } 761 PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 762 PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 763 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 764 PetscCall(PetscFree(buffer)); 765 if (sfA) { 766 *sfA = sf1; 767 } else PetscCall(PetscSFDestroy(&sf1)); 768 /* Create sf */ 769 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 770 /* leaf space == root space */ 771 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 772 if (owners[i].rank != rank) ++nleaves; 773 PetscCall(PetscMalloc1(nleaves, &ilocal)); 774 PetscCall(PetscMalloc1(nleaves, &iremote)); 775 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 776 if (owners[i].rank != rank) { 777 ilocal[nleaves] = leafLocalOffset + i; 778 iremote[nleaves].rank = owners[i].rank; 779 iremote[nleaves].index = owners[i].index; 780 ++nleaves; 781 } 782 } 783 PetscCall(PetscFree(owners)); 784 } else { 785 nleaves = numLeafIndices; 786 PetscCall(PetscMalloc1(nleaves, &ilocal)); 787 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 788 iremote = owners; 789 } 790 PetscCall(PetscSFCreate(comm, sf)); 791 PetscCall(PetscSFSetFromOptions(*sf)); 792 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 793 PetscFunctionReturn(PETSC_SUCCESS); 794 } 795 796 /*@ 797 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 798 799 Collective 800 801 Input Parameters: 802 + sfa - default `PetscSF` 803 - sfb - additional edges to add/replace edges in sfa 804 805 Output Parameter: 806 . merged - new `PetscSF` with combined edges 807 808 Level: intermediate 809 810 .seealso: `PetscSFCompose()` 811 @*/ 812 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 813 { 814 PetscInt maxleaf; 815 816 PetscFunctionBegin; 817 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 818 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 819 PetscCheckSameComm(sfa, 1, sfb, 2); 820 PetscAssertPointer(merged, 3); 821 { 822 PetscInt aleaf, bleaf; 823 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 824 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 825 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 826 } 827 PetscInt *clocal, aroots, aleaves, broots, bleaves; 828 PetscSFNode *cremote; 829 const PetscInt *alocal, *blocal; 830 const PetscSFNode *aremote, *bremote; 831 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 832 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 833 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 834 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 835 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 836 for (PetscInt i = 0; i < aleaves; i++) { 837 PetscInt a = alocal ? alocal[i] : i; 838 clocal[a] = a; 839 cremote[a] = aremote[i]; 840 } 841 for (PetscInt i = 0; i < bleaves; i++) { 842 PetscInt b = blocal ? blocal[i] : i; 843 clocal[b] = b; 844 cremote[b] = bremote[i]; 845 } 846 PetscInt nleaves = 0; 847 for (PetscInt i = 0; i < maxleaf; i++) { 848 if (clocal[i] < 0) continue; 849 clocal[nleaves] = clocal[i]; 850 cremote[nleaves] = cremote[i]; 851 nleaves++; 852 } 853 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 854 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 855 PetscCall(PetscFree2(clocal, cremote)); 856 PetscFunctionReturn(PETSC_SUCCESS); 857 } 858 859 /*@ 860 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 861 862 Collective 863 864 Input Parameters: 865 + sf - star forest 866 . bs - stride 867 . ldr - leading dimension of root space 868 - ldl - leading dimension of leaf space 869 870 Output Parameter: 871 . vsf - the new `PetscSF` 872 873 Level: intermediate 874 875 Notes: 876 This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence 877 .vb 878 c_datatype *roots, *leaves; 879 for i in [0,bs) do 880 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 881 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 882 .ve 883 is equivalent to 884 .vb 885 c_datatype *roots, *leaves; 886 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 887 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 888 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 889 .ve 890 891 Developer Notes: 892 Should this functionality be handled with a new API instead of creating a new object? 893 894 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 895 @*/ 896 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 897 { 898 PetscSF rankssf; 899 const PetscSFNode *iremote, *sfrremote; 900 PetscSFNode *viremote; 901 const PetscInt *ilocal; 902 PetscInt *vilocal = NULL, *ldrs; 903 PetscInt nranks, nr, nl, vnr, vnl, maxl; 904 PetscMPIInt rank; 905 MPI_Comm comm; 906 PetscSFType sftype; 907 908 PetscFunctionBegin; 909 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 910 PetscValidLogicalCollectiveInt(sf, bs, 2); 911 PetscAssertPointer(vsf, 5); 912 if (bs == 1) { 913 PetscCall(PetscObjectReference((PetscObject)sf)); 914 *vsf = sf; 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } 917 PetscCall(PetscSFSetUp(sf)); 918 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 919 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 920 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 921 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 922 maxl += 1; 923 if (ldl == PETSC_DECIDE) ldl = maxl; 924 if (ldr == PETSC_DECIDE) ldr = nr; 925 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 926 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 927 vnr = nr * bs; 928 vnl = nl * bs; 929 PetscCall(PetscMalloc1(vnl, &viremote)); 930 PetscCall(PetscMalloc1(vnl, &vilocal)); 931 932 /* Communicate root leading dimensions to leaf ranks */ 933 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 934 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 935 PetscCall(PetscMalloc1(nranks, &ldrs)); 936 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 937 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 938 939 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 940 const PetscInt r = iremote[i].rank; 941 const PetscInt ii = iremote[i].index; 942 943 if (r == rank) lda = ldr; 944 else if (rold != r) { 945 PetscInt j; 946 947 for (j = 0; j < nranks; j++) 948 if (sfrremote[j].rank == r) break; 949 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 950 lda = ldrs[j]; 951 } 952 rold = r; 953 for (PetscInt v = 0; v < bs; v++) { 954 viremote[v * nl + i].rank = r; 955 viremote[v * nl + i].index = v * lda + ii; 956 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 957 } 958 } 959 PetscCall(PetscFree(ldrs)); 960 PetscCall(PetscSFCreate(comm, vsf)); 961 PetscCall(PetscSFGetType(sf, &sftype)); 962 PetscCall(PetscSFSetType(*vsf, sftype)); 963 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966