1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a `PetscSF` communication pattern using global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots, i.e. which roots are owned by each MPI process 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process 13 . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage, that is the locations are in [0,`nleaves`) 14 . localmode - copy mode for `ilocal` 15 - gremote - root vertices in global numbering corresponding to the leaves 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of `layout`. 21 Leaf indices in `ilocal` get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,`nleaves`) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, const PetscInt gremote[]) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscAssertPointer(layout, 2); 40 if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4); 41 if (nleaves > 0) PetscAssertPointer(gremote, 6); 42 PetscCall(PetscLayoutSetUp(layout)); 43 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 44 PetscCall(PetscLayoutGetRanges(layout, &range)); 45 PetscCall(PetscMalloc1(nleaves, &remote)); 46 if (nleaves) ls = gremote[0] + 1; 47 for (i = 0; i < nleaves; i++) { 48 const PetscInt idx = gremote[i] - ls; 49 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 50 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 51 remote[i].rank = lr; 52 ls = range[lr]; 53 ln = range[lr + 1] - ls; 54 } else { 55 remote[i].rank = lr; 56 remote[i].index = idx; 57 } 58 } 59 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 60 PetscFunctionReturn(PETSC_SUCCESS); 61 } 62 63 /*@C 64 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe a `PetscSF` 65 66 Collective 67 68 Input Parameter: 69 . sf - star forest 70 71 Output Parameters: 72 + layout - `PetscLayout` defining the global space for roots 73 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 74 . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage 75 - gremote - root vertices in global numbering corresponding to the leaves 76 77 Level: intermediate 78 79 Notes: 80 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 81 The outputs `layout` and `gremote` are freshly created each time this function is called, 82 so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user. 83 84 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 85 @*/ 86 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 87 { 88 PetscInt nr, nl; 89 const PetscSFNode *ir; 90 PetscLayout lt; 91 92 PetscFunctionBegin; 93 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 94 if (layout) PetscAssertPointer(layout, 2); 95 if (nleaves) PetscAssertPointer(nleaves, 3); 96 if (ilocal) PetscAssertPointer(ilocal, 4); 97 if (gremote) PetscAssertPointer(gremote, 5); 98 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 99 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 100 if (gremote) { 101 PetscInt i; 102 const PetscInt *range; 103 PetscInt *gr; 104 105 PetscCall(PetscLayoutGetRanges(lt, &range)); 106 PetscCall(PetscMalloc1(nl, &gr)); 107 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 108 *gremote = gr; 109 } 110 if (nleaves) *nleaves = nl; 111 if (layout) *layout = lt; 112 else PetscCall(PetscLayoutDestroy(<)); 113 PetscFunctionReturn(PETSC_SUCCESS); 114 } 115 116 /*@ 117 PetscSFSetGraphSection - Sets the `PetscSF` graph (communication pattern) encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 118 119 Input Parameters: 120 + sf - The `PetscSF` 121 . localSection - `PetscSection` describing the local data layout 122 - globalSection - `PetscSection` describing the global data layout 123 124 Level: developer 125 126 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 127 @*/ 128 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 129 { 130 MPI_Comm comm; 131 PetscLayout layout; 132 const PetscInt *ranges; 133 PetscInt *local; 134 PetscSFNode *remote; 135 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 136 PetscMPIInt size, rank; 137 138 PetscFunctionBegin; 139 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 140 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 141 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 142 143 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 144 PetscCallMPI(MPI_Comm_size(comm, &size)); 145 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 146 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 147 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 148 PetscCall(PetscLayoutCreate(comm, &layout)); 149 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 150 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 151 PetscCall(PetscLayoutSetUp(layout)); 152 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 153 for (p = pStart; p < pEnd; ++p) { 154 PetscInt gdof, gcdof; 155 156 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 157 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 158 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof); 159 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 160 } 161 PetscCall(PetscMalloc1(nleaves, &local)); 162 PetscCall(PetscMalloc1(nleaves, &remote)); 163 for (p = pStart, l = 0; p < pEnd; ++p) { 164 const PetscInt *cind; 165 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 166 167 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 168 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 169 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 170 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 171 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 172 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 173 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 174 if (!gdof) continue; /* Censored point */ 175 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 176 if (gsize != dof - cdof) { 177 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 178 cdof = 0; /* Ignore constraints */ 179 } 180 for (d = 0, c = 0; d < dof; ++d) { 181 if ((c < cdof) && (cind[c] == d)) { 182 ++c; 183 continue; 184 } 185 local[l + d - c] = off + d; 186 } 187 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 188 if (gdof < 0) { 189 for (d = 0; d < gsize; ++d, ++l) { 190 PetscInt offset = -(goff + 1) + d, ir; 191 PetscMPIInt r; 192 193 PetscCall(PetscFindInt(offset, size + 1, ranges, &ir)); 194 PetscCall(PetscMPIIntCast(ir, &r)); 195 if (r < 0) r = -(r + 2); 196 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 197 remote[l].rank = r; 198 remote[l].index = offset - ranges[r]; 199 } 200 } else { 201 for (d = 0; d < gsize; ++d, ++l) { 202 remote[l].rank = rank; 203 remote[l].index = goff + d - ranges[rank]; 204 } 205 } 206 } 207 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 208 PetscCall(PetscLayoutDestroy(&layout)); 209 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 210 PetscFunctionReturn(PETSC_SUCCESS); 211 } 212 213 /*@C 214 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 215 216 Collective 217 218 Input Parameters: 219 + sf - The `PetscSF` 220 - rootSection - Section defined on root space 221 222 Output Parameters: 223 + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection` 224 - leafSection - Section defined on the leaf space 225 226 Level: advanced 227 228 Note: 229 Caller must `PetscFree()` `remoteOffsets` if it was requested 230 231 To distribute data from the `rootSection` to the `leafSection`, see `PetscSFCreateSectionSF()` or `PetscSectionMigrateData()`. 232 233 Fortran Note: 234 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 235 236 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFCreateSectionSF()` 237 @*/ 238 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection) 239 { 240 PetscSF embedSF; 241 const PetscInt *indices; 242 IS selected; 243 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c; 244 PetscBool *sub, hasc; 245 246 PetscFunctionBegin; 247 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 248 PetscValidHeaderSpecific(rootSection, PETSC_SECTION_CLASSID, 2); 249 if (remoteOffsets) PetscAssertPointer(remoteOffsets, 3); 250 PetscValidHeaderSpecific(leafSection, PETSC_SECTION_CLASSID, 4); 251 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 252 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 253 if (numFields) { 254 IS perm; 255 256 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 257 leafSection->perm. To keep this permutation set by the user, we grab 258 the reference before calling PetscSectionSetNumFields() and set it 259 back after. */ 260 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 261 PetscCall(PetscObjectReference((PetscObject)perm)); 262 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 263 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 264 PetscCall(ISDestroy(&perm)); 265 } 266 PetscCall(PetscMalloc1(numFields + 2, &sub)); 267 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 268 for (f = 0; f < numFields; ++f) { 269 PetscSectionSym sym, dsym = NULL; 270 const char *name = NULL; 271 PetscInt numComp = 0; 272 273 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 274 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 275 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 276 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 277 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 278 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 279 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 280 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 281 PetscCall(PetscSectionSymDestroy(&dsym)); 282 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 283 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 284 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 285 } 286 } 287 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 288 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 289 rpEnd = PetscMin(rpEnd, nroots); 290 rpEnd = PetscMax(rpStart, rpEnd); 291 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 292 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 293 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 294 if (sub[0]) { 295 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 296 PetscCall(ISGetIndices(selected, &indices)); 297 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 298 PetscCall(ISRestoreIndices(selected, &indices)); 299 PetscCall(ISDestroy(&selected)); 300 } else { 301 PetscCall(PetscObjectReference((PetscObject)sf)); 302 embedSF = sf; 303 } 304 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 305 lpEnd++; 306 307 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 308 309 /* Constrained dof section */ 310 hasc = sub[1]; 311 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 312 313 /* Could fuse these at the cost of copies and extra allocation */ 314 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 315 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 316 if (sub[1]) { 317 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 318 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 319 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 320 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 321 } 322 for (f = 0; f < numFields; ++f) { 323 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 324 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 325 if (sub[2 + f]) { 326 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 327 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 328 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 329 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 330 } 331 } 332 if (remoteOffsets) { 333 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 334 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 335 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 336 } 337 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 338 PetscCall(PetscSectionSetUp(leafSection)); 339 if (hasc) { /* need to communicate bcIndices */ 340 PetscSF bcSF; 341 PetscInt *rOffBc; 342 343 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 344 if (sub[1]) { 345 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 346 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 347 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 348 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 349 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 350 PetscCall(PetscSFDestroy(&bcSF)); 351 } 352 for (f = 0; f < numFields; ++f) { 353 if (sub[2 + f]) { 354 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 355 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 356 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 357 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 358 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 359 PetscCall(PetscSFDestroy(&bcSF)); 360 } 361 } 362 PetscCall(PetscFree(rOffBc)); 363 } 364 PetscCall(PetscSFDestroy(&embedSF)); 365 PetscCall(PetscFree(sub)); 366 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 367 PetscFunctionReturn(PETSC_SUCCESS); 368 } 369 370 /*@C 371 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 372 373 Collective 374 375 Input Parameters: 376 + sf - The `PetscSF` 377 . rootSection - Data layout of remote points for outgoing data (this is layout for roots) 378 - leafSection - Data layout of local points for incoming data (this is layout for leaves) 379 380 Output Parameter: 381 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL` 382 383 Level: developer 384 385 Note: 386 Caller must `PetscFree()` `remoteOffsets` if it was requested 387 388 Fortran Note: 389 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 390 391 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 392 @*/ 393 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[]) 394 { 395 PetscSF embedSF; 396 const PetscInt *indices; 397 IS selected; 398 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 399 400 PetscFunctionBegin; 401 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 402 PetscValidHeaderSpecific(rootSection, PETSC_SECTION_CLASSID, 2); 403 PetscValidHeaderSpecific(leafSection, PETSC_SECTION_CLASSID, 3); 404 PetscAssertPointer(remoteOffsets, 4); 405 *remoteOffsets = NULL; 406 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 407 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 408 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 409 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 410 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 411 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 412 PetscCall(ISGetIndices(selected, &indices)); 413 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 414 PetscCall(ISRestoreIndices(selected, &indices)); 415 PetscCall(ISDestroy(&selected)); 416 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 417 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 418 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 419 PetscCall(PetscSFDestroy(&embedSF)); 420 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 421 PetscFunctionReturn(PETSC_SUCCESS); 422 } 423 424 /*@ 425 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 426 427 Collective 428 429 Input Parameters: 430 + sf - The `PetscSF` 431 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 432 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL` 433 - leafSection - Data layout of local points for incoming data (this is the distributed section) 434 435 Output Parameter: 436 . sectionSF - The new `PetscSF` 437 438 Level: advanced 439 440 Notes: 441 `remoteOffsets` can be `NULL` if `sf` does not reference any points in `leafSection` 442 443 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFDistributeSection()` 444 @*/ 445 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 446 { 447 MPI_Comm comm; 448 const PetscInt *localPoints; 449 const PetscSFNode *remotePoints; 450 PetscInt lpStart, lpEnd; 451 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 452 PetscInt *localIndices; 453 PetscSFNode *remoteIndices; 454 PetscInt i, ind; 455 456 PetscFunctionBegin; 457 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 458 PetscAssertPointer(rootSection, 2); 459 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 460 PetscAssertPointer(leafSection, 4); 461 PetscAssertPointer(sectionSF, 5); 462 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 463 PetscCall(PetscSFCreate(comm, sectionSF)); 464 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 465 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 466 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 467 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 468 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 469 for (i = 0; i < numPoints; ++i) { 470 PetscInt localPoint = localPoints ? localPoints[i] : i; 471 PetscInt dof; 472 473 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 474 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 475 numIndices += dof < 0 ? 0 : dof; 476 } 477 } 478 PetscCall(PetscMalloc1(numIndices, &localIndices)); 479 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 480 /* Create new index graph */ 481 for (i = 0, ind = 0; i < numPoints; ++i) { 482 PetscInt localPoint = localPoints ? localPoints[i] : i; 483 PetscInt rank = remotePoints[i].rank; 484 485 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 486 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 487 PetscInt loff, dof, d; 488 489 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 490 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 491 for (d = 0; d < dof; ++d, ++ind) { 492 localIndices[ind] = loff + d; 493 remoteIndices[ind].rank = rank; 494 remoteIndices[ind].index = remoteOffset + d; 495 } 496 } 497 } 498 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 499 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 500 PetscCall(PetscSFSetUp(*sectionSF)); 501 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 502 PetscFunctionReturn(PETSC_SUCCESS); 503 } 504 505 /*@C 506 PetscSFCreateFromLayouts - Creates a parallel star forest mapping between two `PetscLayout` objects 507 508 Collective 509 510 Input Parameters: 511 + rmap - `PetscLayout` defining the global root space 512 - lmap - `PetscLayout` defining the global leaf space 513 514 Output Parameter: 515 . sf - The parallel star forest 516 517 Level: intermediate 518 519 Notes: 520 If the global length of `lmap` differs from the global length of `rmap` then the excess entries are ignored. 521 522 The resulting `sf` used with `PetscSFBcastBegin()` and `PetscSFBcastEnd()` merely copies the array entries of `rootdata` to 523 `leafdata`; moving them between MPI processes if needed. For example, 524 if rmap is [0, 3, 5) and lmap is [0, 2, 6) and `rootdata` is (1, 2, 3) on MPI rank 0 and (4, 5) on MPI rank 1 then the 525 `leafdata` would become (1, 2) on MPI rank 0 and (3, 4, 5, x) on MPI rank 1. 526 527 .seealso: [](sec_petscsf), `PetscSF`, `PetscLayout`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 528 @*/ 529 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 530 { 531 PetscInt i, nroots, nleaves = 0; 532 PetscInt rN, lst, len; 533 PetscMPIInt owner = -1; 534 PetscSFNode *remote; 535 MPI_Comm rcomm = rmap->comm; 536 MPI_Comm lcomm = lmap->comm; 537 PetscMPIInt flg; 538 539 PetscFunctionBegin; 540 PetscAssertPointer(rmap, 1); 541 PetscAssertPointer(lmap, 2); 542 PetscAssertPointer(sf, 3); 543 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 544 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 545 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 546 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 547 PetscCall(PetscSFCreate(rcomm, sf)); 548 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 549 PetscCall(PetscLayoutGetSize(rmap, &rN)); 550 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 551 PetscCall(PetscMalloc1(len - lst, &remote)); 552 for (i = lst; i < len && i < rN; i++) { 553 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 554 remote[nleaves].rank = owner; 555 remote[nleaves].index = i - rmap->range[owner]; 556 nleaves++; 557 } 558 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 559 PetscCall(PetscFree(remote)); 560 PetscFunctionReturn(PETSC_SUCCESS); 561 } 562 563 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 564 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[]) 565 { 566 PetscInt *owners = map->range; 567 PetscInt n = map->n; 568 PetscSF sf; 569 PetscInt *lidxs, *work = NULL, *ilocal; 570 PetscSFNode *ridxs; 571 PetscMPIInt rank, p = 0; 572 PetscInt r, len = 0, nleaves = 0; 573 574 PetscFunctionBegin; 575 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 576 /* Create SF where leaves are input idxs and roots are owned idxs */ 577 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 578 PetscCall(PetscMalloc1(n, &lidxs)); 579 for (r = 0; r < n; ++r) lidxs[r] = -1; 580 PetscCall(PetscMalloc1(N, &ridxs)); 581 PetscCall(PetscMalloc1(N, &ilocal)); 582 for (r = 0; r < N; ++r) { 583 const PetscInt idx = idxs[r]; 584 585 if (idx < 0) continue; 586 PetscCheck(idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 587 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 588 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 589 } 590 ridxs[nleaves].rank = p; 591 ridxs[nleaves].index = idxs[r] - owners[p]; 592 ilocal[nleaves] = r; 593 nleaves++; 594 } 595 PetscCall(PetscSFCreate(map->comm, &sf)); 596 PetscCall(PetscSFSetGraph(sf, n, nleaves, ilocal, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 597 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 598 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 599 if (ogidxs) { /* communicate global idxs */ 600 PetscInt cum = 0, start, *work2; 601 602 PetscCall(PetscMalloc1(n, &work)); 603 PetscCall(PetscCalloc1(N, &work2)); 604 for (r = 0; r < N; ++r) 605 if (idxs[r] >= 0) cum++; 606 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 607 start -= cum; 608 cum = 0; 609 for (r = 0; r < N; ++r) 610 if (idxs[r] >= 0) work2[r] = start + cum++; 611 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 612 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 613 PetscCall(PetscFree(work2)); 614 } 615 PetscCall(PetscSFDestroy(&sf)); 616 /* Compress and put in indices */ 617 for (r = 0; r < n; ++r) 618 if (lidxs[r] >= 0) { 619 if (work) work[len] = work[r]; 620 lidxs[len++] = r; 621 } 622 if (on) *on = len; 623 if (oidxs) *oidxs = lidxs; 624 if (ogidxs) *ogidxs = work; 625 PetscFunctionReturn(PETSC_SUCCESS); 626 } 627 628 /*@ 629 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 630 631 Collective 632 633 Input Parameters: 634 + layout - `PetscLayout` defining the global index space and the MPI rank that brokers each index 635 . numRootIndices - size of `rootIndices` 636 . rootIndices - array of global indices of which this process requests ownership 637 . rootLocalIndices - root local index permutation (`NULL` if no permutation) 638 . rootLocalOffset - offset to be added to `rootLocalIndices` 639 . numLeafIndices - size of `leafIndices` 640 . leafIndices - array of global indices with which this process requires data associated 641 . leafLocalIndices - leaf local index permutation (`NULL` if no permutation) 642 - leafLocalOffset - offset to be added to `leafLocalIndices` 643 644 Output Parameters: 645 + sfA - star forest representing the communication pattern from the layout space to the leaf space (`NULL` if not needed) 646 - sf - star forest representing the communication pattern from the root space to the leaf space 647 648 Level: advanced 649 650 Example 1: 651 .vb 652 rank : 0 1 2 653 rootIndices : [1 0 2] [3] [3] 654 rootLocalOffset : 100 200 300 655 layout : [0 1] [2] [3] 656 leafIndices : [0] [2] [0 3] 657 leafLocalOffset : 400 500 600 658 659 would build the following PetscSF 660 661 [0] 400 <- (0,101) 662 [1] 500 <- (0,102) 663 [2] 600 <- (0,101) 664 [2] 601 <- (2,300) 665 .ve 666 667 Example 2: 668 .vb 669 rank : 0 1 2 670 rootIndices : [1 0 2] [3] [3] 671 rootLocalOffset : 100 200 300 672 layout : [0 1] [2] [3] 673 leafIndices : rootIndices rootIndices rootIndices 674 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 675 676 would build the following PetscSF 677 678 [1] 200 <- (2,300) 679 .ve 680 681 Example 3: 682 .vb 683 No process requests ownership of global index 1, but no process needs it. 684 685 rank : 0 1 2 686 numRootIndices : 2 1 1 687 rootIndices : [0 2] [3] [3] 688 rootLocalOffset : 100 200 300 689 layout : [0 1] [2] [3] 690 numLeafIndices : 1 1 2 691 leafIndices : [0] [2] [0 3] 692 leafLocalOffset : 400 500 600 693 694 would build the following PetscSF 695 696 [0] 400 <- (0,100) 697 [1] 500 <- (0,101) 698 [2] 600 <- (0,100) 699 [2] 601 <- (2,300) 700 .ve 701 702 Notes: 703 `layout` represents any partitioning of [0, N), where N is the total number of global indices, and its 704 local size can be set to `PETSC_DECIDE`. 705 706 If a global index x lies in the partition owned by process i, each process whose `rootIndices` contains x requests 707 ownership of x and sends its own rank and the local index of x to process i. 708 If multiple processes request ownership of x, the one with the highest rank is to own x. 709 Process i then broadcasts the ownership information, so that each process whose `leafIndices` contains x knows the 710 ownership information of x. 711 The output `sf` is constructed by associating each leaf point to a root point in this way. 712 713 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 714 The optional output `sfA` can be used to push such data to leaf points. 715 716 All indices in `rootIndices` and `leafIndices` must lie in the layout range. The union (over all processes) of `rootIndices` 717 must cover that of `leafIndices`, but need not cover the entire layout. 718 719 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 720 star forest is almost identity, so will only include non-trivial part of the map. 721 722 Developer Notes: 723 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 724 hash(rank, root_local_index) as the bid for the ownership determination. 725 726 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()` 727 @*/ 728 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt rootIndices[], const PetscInt rootLocalIndices[], PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt leafIndices[], const PetscInt leafLocalIndices[], PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 729 { 730 MPI_Comm comm = layout->comm; 731 PetscMPIInt rank; 732 PetscSF sf1; 733 PetscSFNode *owners, *buffer, *iremote; 734 PetscInt *ilocal, nleaves, N, n, i; 735 PetscBool areIndicesSame; 736 737 PetscFunctionBegin; 738 PetscAssertPointer(layout, 1); 739 if (rootIndices) PetscAssertPointer(rootIndices, 3); 740 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 741 if (leafIndices) PetscAssertPointer(leafIndices, 7); 742 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 743 if (sfA) PetscAssertPointer(sfA, 10); 744 PetscAssertPointer(sf, 11); 745 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 746 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 747 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 748 PetscCall(PetscLayoutSetUp(layout)); 749 PetscCall(PetscLayoutGetSize(layout, &N)); 750 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 751 areIndicesSame = (PetscBool)(leafIndices == rootIndices); 752 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &areIndicesSame, 1, MPI_C_BOOL, MPI_LAND, comm)); 753 PetscCheck(!areIndicesSame || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 754 if (PetscDefined(USE_DEBUG)) { 755 PetscInt N1 = PETSC_INT_MIN; 756 for (i = 0; i < numRootIndices; i++) 757 if (rootIndices[i] > N1) N1 = rootIndices[i]; 758 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 759 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 760 if (!areIndicesSame) { 761 N1 = PETSC_INT_MIN; 762 for (i = 0; i < numLeafIndices; i++) 763 if (leafIndices[i] > N1) N1 = leafIndices[i]; 764 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 765 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 766 } 767 } 768 769 /* Reduce: owners -> buffer */ 770 PetscCall(PetscMalloc1(n, &buffer)); 771 PetscCall(PetscSFCreate(comm, &sf1)); 772 PetscCall(PetscSFSetFromOptions(sf1)); 773 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 774 PetscCall(PetscMalloc1(numRootIndices, &owners)); 775 for (i = 0; i < numRootIndices; ++i) { 776 owners[i].rank = rank; 777 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 778 } 779 for (i = 0; i < n; ++i) { 780 buffer[i].index = -1; 781 buffer[i].rank = -1; 782 } 783 PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 784 PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 785 /* Bcast: buffer -> owners */ 786 if (!areIndicesSame) { 787 PetscCall(PetscFree(owners)); 788 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 789 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 790 } 791 PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 792 PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 793 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 794 PetscCall(PetscFree(buffer)); 795 if (sfA) { 796 *sfA = sf1; 797 } else PetscCall(PetscSFDestroy(&sf1)); 798 /* Create sf */ 799 if (areIndicesSame && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 800 /* leaf space == root space */ 801 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 802 if (owners[i].rank != rank) ++nleaves; 803 PetscCall(PetscMalloc1(nleaves, &ilocal)); 804 PetscCall(PetscMalloc1(nleaves, &iremote)); 805 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 806 if (owners[i].rank != rank) { 807 ilocal[nleaves] = leafLocalOffset + i; 808 iremote[nleaves].rank = owners[i].rank; 809 iremote[nleaves].index = owners[i].index; 810 ++nleaves; 811 } 812 } 813 PetscCall(PetscFree(owners)); 814 } else { 815 nleaves = numLeafIndices; 816 PetscCall(PetscMalloc1(nleaves, &ilocal)); 817 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 818 iremote = owners; 819 } 820 PetscCall(PetscSFCreate(comm, sf)); 821 PetscCall(PetscSFSetFromOptions(*sf)); 822 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 823 PetscFunctionReturn(PETSC_SUCCESS); 824 } 825 826 /*@ 827 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 828 829 Collective 830 831 Input Parameters: 832 + sfa - default `PetscSF` 833 - sfb - additional edges to add/replace edges in `sfa` 834 835 Output Parameter: 836 . merged - new `PetscSF` with combined edges 837 838 Level: intermediate 839 840 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()` 841 @*/ 842 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 843 { 844 PetscInt maxleaf; 845 846 PetscFunctionBegin; 847 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 848 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 849 PetscCheckSameComm(sfa, 1, sfb, 2); 850 PetscAssertPointer(merged, 3); 851 { 852 PetscInt aleaf, bleaf; 853 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 854 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 855 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 856 } 857 PetscInt *clocal, aroots, aleaves, broots, bleaves; 858 PetscSFNode *cremote; 859 const PetscInt *alocal, *blocal; 860 const PetscSFNode *aremote, *bremote; 861 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 862 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 863 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 864 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 865 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 866 for (PetscInt i = 0; i < aleaves; i++) { 867 PetscInt a = alocal ? alocal[i] : i; 868 clocal[a] = a; 869 cremote[a] = aremote[i]; 870 } 871 for (PetscInt i = 0; i < bleaves; i++) { 872 PetscInt b = blocal ? blocal[i] : i; 873 clocal[b] = b; 874 cremote[b] = bremote[i]; 875 } 876 PetscInt nleaves = 0; 877 for (PetscInt i = 0; i < maxleaf; i++) { 878 if (clocal[i] < 0) continue; 879 clocal[nleaves] = clocal[i]; 880 cremote[nleaves] = cremote[i]; 881 nleaves++; 882 } 883 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 884 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 885 PetscCall(PetscFree2(clocal, cremote)); 886 PetscFunctionReturn(PETSC_SUCCESS); 887 } 888 889 /*@ 890 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 891 892 Collective 893 894 Input Parameters: 895 + sf - star forest 896 . bs - stride 897 . ldr - leading dimension of root space 898 - ldl - leading dimension of leaf space 899 900 Output Parameter: 901 . vsf - the new `PetscSF` 902 903 Level: intermediate 904 905 Notes: 906 This can be useful to perform communications on multiple right-hand sides stored in a Fortran-style two dimensional array. 907 For example, the calling sequence 908 .vb 909 c_datatype *roots, *leaves; 910 for i in [0,bs) do 911 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 912 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 913 .ve 914 is equivalent to 915 .vb 916 c_datatype *roots, *leaves; 917 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 918 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 919 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 920 .ve 921 922 Developer Notes: 923 Should this functionality be handled with a new API instead of creating a new object? 924 925 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 926 @*/ 927 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 928 { 929 PetscSF rankssf; 930 const PetscSFNode *iremote, *sfrremote; 931 PetscSFNode *viremote; 932 const PetscInt *ilocal; 933 PetscInt *vilocal = NULL, *ldrs; 934 PetscInt nranks, nr, nl, vnr, vnl, maxl; 935 PetscMPIInt rank; 936 MPI_Comm comm; 937 PetscSFType sftype; 938 939 PetscFunctionBegin; 940 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 941 PetscValidLogicalCollectiveInt(sf, bs, 2); 942 PetscAssertPointer(vsf, 5); 943 if (bs == 1) { 944 PetscCall(PetscObjectReference((PetscObject)sf)); 945 *vsf = sf; 946 PetscFunctionReturn(PETSC_SUCCESS); 947 } 948 PetscCall(PetscSFSetUp(sf)); 949 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 950 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 951 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 952 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 953 maxl += 1; 954 if (ldl == PETSC_DECIDE) ldl = maxl; 955 if (ldr == PETSC_DECIDE) ldr = nr; 956 ldl /= PetscMax(1, sf->vscat.bs); // SFs created from VecScatterCreate() may have a nonzero block size. If not 0, we need to scale ldl and ldr 957 ldr /= PetscMax(1, sf->vscat.bs); 958 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 959 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 960 vnr = nr * bs; 961 vnl = nl * bs; 962 PetscCall(PetscMalloc1(vnl, &viremote)); 963 PetscCall(PetscMalloc1(vnl, &vilocal)); 964 965 /* Communicate root leading dimensions to leaf ranks */ 966 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 967 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 968 PetscCall(PetscMalloc1(nranks, &ldrs)); 969 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 970 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 971 972 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 973 const PetscInt r = iremote[i].rank; 974 const PetscInt ii = iremote[i].index; 975 976 if (r == rank) lda = ldr; 977 else if (rold != r) { 978 PetscInt j; 979 980 for (j = 0; j < nranks; j++) 981 if (sfrremote[j].rank == r) break; 982 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 983 lda = ldrs[j]; 984 } 985 rold = r; 986 for (PetscInt v = 0; v < bs; v++) { 987 viremote[v * nl + i].rank = r; 988 viremote[v * nl + i].index = v * lda + ii; 989 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 990 } 991 } 992 PetscCall(PetscFree(ldrs)); 993 PetscCall(PetscSFCreate(comm, vsf)); 994 if (sf->vscat.bs > 1) { 995 (*vsf)->vscat.bs = sf->vscat.bs; 996 PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &(*vsf)->vscat.unit)); 997 (*vsf)->vscat.to_n = bs * sf->vscat.to_n; 998 (*vsf)->vscat.from_n = bs * sf->vscat.from_n; 999 } 1000 PetscCall(PetscSFGetType(sf, &sftype)); 1001 PetscCall(PetscSFSetType(*vsf, sftype)); 1002 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005