1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - gremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscCall(PetscLayoutSetUp(layout)); 40 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 41 PetscCall(PetscLayoutGetRanges(layout, &range)); 42 PetscCall(PetscMalloc1(nleaves, &remote)); 43 if (nleaves) ls = gremote[0] + 1; 44 for (i = 0; i < nleaves; i++) { 45 const PetscInt idx = gremote[i] - ls; 46 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 47 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 48 remote[i].rank = lr; 49 ls = range[lr]; 50 ln = range[lr + 1] - ls; 51 } else { 52 remote[i].rank = lr; 53 remote[i].index = idx; 54 } 55 } 56 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest 62 63 Collective 64 65 Input Parameter: 66 . sf - star forest 67 68 Output Parameters: 69 + layout - `PetscLayout` defining the global space for roots 70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 71 . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage 72 - gremote - root vertices in global numbering corresponding to leaves in ilocal 73 74 Level: intermediate 75 76 Notes: 77 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 78 The outputs `layout` and `gremote` are freshly created each time this function is called, 79 so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user. 80 81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82 @*/ 83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 84 { 85 PetscInt nr, nl; 86 const PetscSFNode *ir; 87 PetscLayout lt; 88 89 PetscFunctionBegin; 90 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 92 if (gremote) { 93 PetscInt i; 94 const PetscInt *range; 95 PetscInt *gr; 96 97 PetscCall(PetscLayoutGetRanges(lt, &range)); 98 PetscCall(PetscMalloc1(nl, &gr)); 99 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 100 *gremote = gr; 101 } 102 if (nleaves) *nleaves = nl; 103 if (layout) *layout = lt; 104 else PetscCall(PetscLayoutDestroy(<)); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 /*@ 109 PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 110 111 Input Parameters: 112 + sf - The `PetscSF` 113 . localSection - `PetscSection` describing the local data layout 114 - globalSection - `PetscSection` describing the global data layout 115 116 Level: developer 117 118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 119 @*/ 120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 121 { 122 MPI_Comm comm; 123 PetscLayout layout; 124 const PetscInt *ranges; 125 PetscInt *local; 126 PetscSFNode *remote; 127 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 128 PetscMPIInt size, rank; 129 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 133 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 134 135 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 136 PetscCallMPI(MPI_Comm_size(comm, &size)); 137 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 138 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 139 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 140 PetscCall(PetscLayoutCreate(comm, &layout)); 141 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 142 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 143 PetscCall(PetscLayoutSetUp(layout)); 144 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 145 for (p = pStart; p < pEnd; ++p) { 146 PetscInt gdof, gcdof; 147 148 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 149 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 150 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof); 151 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 152 } 153 PetscCall(PetscMalloc1(nleaves, &local)); 154 PetscCall(PetscMalloc1(nleaves, &remote)); 155 for (p = pStart, l = 0; p < pEnd; ++p) { 156 const PetscInt *cind; 157 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 158 159 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 160 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 161 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 162 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 163 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 164 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 165 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 166 if (!gdof) continue; /* Censored point */ 167 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 168 if (gsize != dof - cdof) { 169 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 170 cdof = 0; /* Ignore constraints */ 171 } 172 for (d = 0, c = 0; d < dof; ++d) { 173 if ((c < cdof) && (cind[c] == d)) { 174 ++c; 175 continue; 176 } 177 local[l + d - c] = off + d; 178 } 179 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 180 if (gdof < 0) { 181 for (d = 0; d < gsize; ++d, ++l) { 182 PetscInt offset = -(goff + 1) + d, ir; 183 PetscMPIInt r; 184 185 PetscCall(PetscFindInt(offset, size + 1, ranges, &ir)); 186 PetscCall(PetscMPIIntCast(ir, &r)); 187 if (r < 0) r = -(r + 2); 188 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 189 remote[l].rank = r; 190 remote[l].index = offset - ranges[r]; 191 } 192 } else { 193 for (d = 0; d < gsize; ++d, ++l) { 194 remote[l].rank = rank; 195 remote[l].index = goff + d - ranges[rank]; 196 } 197 } 198 } 199 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 /*@C 206 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 207 208 Collective 209 210 Input Parameters: 211 + sf - The `PetscSF` 212 - rootSection - Section defined on root space 213 214 Output Parameters: 215 + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection` 216 - leafSection - Section defined on the leaf space 217 218 Level: advanced 219 220 Note: 221 Caller must `PetscFree()` `remoteOffsets` if it was requested 222 223 Fortran Note: 224 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 225 226 .seealso: `PetscSF`, `PetscSFCreate()` 227 @*/ 228 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection) 229 { 230 PetscSF embedSF; 231 const PetscInt *indices; 232 IS selected; 233 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c; 234 PetscBool *sub, hasc; 235 236 PetscFunctionBegin; 237 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 238 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 239 if (numFields) { 240 IS perm; 241 242 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 243 leafSection->perm. To keep this permutation set by the user, we grab 244 the reference before calling PetscSectionSetNumFields() and set it 245 back after. */ 246 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 247 PetscCall(PetscObjectReference((PetscObject)perm)); 248 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 249 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 250 PetscCall(ISDestroy(&perm)); 251 } 252 PetscCall(PetscMalloc1(numFields + 2, &sub)); 253 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 254 for (f = 0; f < numFields; ++f) { 255 PetscSectionSym sym, dsym = NULL; 256 const char *name = NULL; 257 PetscInt numComp = 0; 258 259 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 260 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 261 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 262 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 263 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 264 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 265 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 266 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 267 PetscCall(PetscSectionSymDestroy(&dsym)); 268 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 269 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 270 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 271 } 272 } 273 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 274 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 275 rpEnd = PetscMin(rpEnd, nroots); 276 rpEnd = PetscMax(rpStart, rpEnd); 277 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 278 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 279 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 280 if (sub[0]) { 281 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 282 PetscCall(ISGetIndices(selected, &indices)); 283 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 284 PetscCall(ISRestoreIndices(selected, &indices)); 285 PetscCall(ISDestroy(&selected)); 286 } else { 287 PetscCall(PetscObjectReference((PetscObject)sf)); 288 embedSF = sf; 289 } 290 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 291 lpEnd++; 292 293 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 294 295 /* Constrained dof section */ 296 hasc = sub[1]; 297 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 298 299 /* Could fuse these at the cost of copies and extra allocation */ 300 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 301 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 302 if (sub[1]) { 303 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 304 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 305 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 306 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 307 } 308 for (f = 0; f < numFields; ++f) { 309 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 310 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 311 if (sub[2 + f]) { 312 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 313 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 314 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 315 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 316 } 317 } 318 if (remoteOffsets) { 319 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 320 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 321 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 322 } 323 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 324 PetscCall(PetscSectionSetUp(leafSection)); 325 if (hasc) { /* need to communicate bcIndices */ 326 PetscSF bcSF; 327 PetscInt *rOffBc; 328 329 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 330 if (sub[1]) { 331 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 332 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 333 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 334 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 335 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 336 PetscCall(PetscSFDestroy(&bcSF)); 337 } 338 for (f = 0; f < numFields; ++f) { 339 if (sub[2 + f]) { 340 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 342 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 343 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 344 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 345 PetscCall(PetscSFDestroy(&bcSF)); 346 } 347 } 348 PetscCall(PetscFree(rOffBc)); 349 } 350 PetscCall(PetscSFDestroy(&embedSF)); 351 PetscCall(PetscFree(sub)); 352 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /*@C 357 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 358 359 Collective 360 361 Input Parameters: 362 + sf - The `PetscSF` 363 . rootSection - Data layout of remote points for outgoing data (this is layout for roots) 364 - leafSection - Data layout of local points for incoming data (this is layout for leaves) 365 366 Output Parameter: 367 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL` 368 369 Level: developer 370 371 Note: 372 Caller must `PetscFree()` `remoteOffsets` if it was requested 373 374 Fortran Note: 375 Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed. 376 377 .seealso: `PetscSF`, `PetscSFCreate()` 378 @*/ 379 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[]) 380 { 381 PetscSF embedSF; 382 const PetscInt *indices; 383 IS selected; 384 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 385 386 PetscFunctionBegin; 387 *remoteOffsets = NULL; 388 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 389 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 390 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 391 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 392 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 393 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 394 PetscCall(ISGetIndices(selected, &indices)); 395 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 396 PetscCall(ISRestoreIndices(selected, &indices)); 397 PetscCall(ISDestroy(&selected)); 398 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 399 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 400 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 401 PetscCall(PetscSFDestroy(&embedSF)); 402 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 /*@ 407 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 408 409 Collective 410 411 Input Parameters: 412 + sf - The `PetscSF` 413 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 414 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 415 - leafSection - Data layout of local points for incoming data (this is the distributed section) 416 417 Output Parameter: 418 . sectionSF - The new `PetscSF` 419 420 Level: advanced 421 422 Notes: 423 `remoteOffsets` can be `NULL` if `sf` does not reference any points in leafSection 424 425 .seealso: `PetscSF`, `PetscSFCreate()` 426 @*/ 427 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 428 { 429 MPI_Comm comm; 430 const PetscInt *localPoints; 431 const PetscSFNode *remotePoints; 432 PetscInt lpStart, lpEnd; 433 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 434 PetscInt *localIndices; 435 PetscSFNode *remoteIndices; 436 PetscInt i, ind; 437 438 PetscFunctionBegin; 439 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 440 PetscAssertPointer(rootSection, 2); 441 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 442 PetscAssertPointer(leafSection, 4); 443 PetscAssertPointer(sectionSF, 5); 444 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 445 PetscCall(PetscSFCreate(comm, sectionSF)); 446 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 447 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 448 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 449 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 450 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 451 for (i = 0; i < numPoints; ++i) { 452 PetscInt localPoint = localPoints ? localPoints[i] : i; 453 PetscInt dof; 454 455 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 456 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 457 numIndices += dof < 0 ? 0 : dof; 458 } 459 } 460 PetscCall(PetscMalloc1(numIndices, &localIndices)); 461 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 462 /* Create new index graph */ 463 for (i = 0, ind = 0; i < numPoints; ++i) { 464 PetscInt localPoint = localPoints ? localPoints[i] : i; 465 PetscInt rank = remotePoints[i].rank; 466 467 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 468 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 469 PetscInt loff, dof, d; 470 471 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 472 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 473 for (d = 0; d < dof; ++d, ++ind) { 474 localIndices[ind] = loff + d; 475 remoteIndices[ind].rank = rank; 476 remoteIndices[ind].index = remoteOffset + d; 477 } 478 } 479 } 480 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 481 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 482 PetscCall(PetscSFSetUp(*sectionSF)); 483 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 484 PetscFunctionReturn(PETSC_SUCCESS); 485 } 486 487 /*@C 488 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects 489 490 Collective 491 492 Input Parameters: 493 + rmap - `PetscLayout` defining the global root space 494 - lmap - `PetscLayout` defining the global leaf space 495 496 Output Parameter: 497 . sf - The parallel star forest 498 499 Level: intermediate 500 501 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 502 @*/ 503 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 504 { 505 PetscInt i, nroots, nleaves = 0; 506 PetscInt rN, lst, len; 507 PetscMPIInt owner = -1; 508 PetscSFNode *remote; 509 MPI_Comm rcomm = rmap->comm; 510 MPI_Comm lcomm = lmap->comm; 511 PetscMPIInt flg; 512 513 PetscFunctionBegin; 514 PetscAssertPointer(sf, 3); 515 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 516 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 517 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 518 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 519 PetscCall(PetscSFCreate(rcomm, sf)); 520 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 521 PetscCall(PetscLayoutGetSize(rmap, &rN)); 522 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 523 PetscCall(PetscMalloc1(len - lst, &remote)); 524 for (i = lst; i < len && i < rN; i++) { 525 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 526 remote[nleaves].rank = owner; 527 remote[nleaves].index = i - rmap->range[owner]; 528 nleaves++; 529 } 530 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 531 PetscCall(PetscFree(remote)); 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 536 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[]) 537 { 538 PetscInt *owners = map->range; 539 PetscInt n = map->n; 540 PetscSF sf; 541 PetscInt *lidxs, *work = NULL, *ilocal; 542 PetscSFNode *ridxs; 543 PetscMPIInt rank, p = 0; 544 PetscInt r, len = 0, nleaves = 0; 545 546 PetscFunctionBegin; 547 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 548 /* Create SF where leaves are input idxs and roots are owned idxs */ 549 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 550 PetscCall(PetscMalloc1(n, &lidxs)); 551 for (r = 0; r < n; ++r) lidxs[r] = -1; 552 PetscCall(PetscMalloc1(N, &ridxs)); 553 PetscCall(PetscMalloc1(N, &ilocal)); 554 for (r = 0; r < N; ++r) { 555 const PetscInt idx = idxs[r]; 556 557 if (idx < 0) continue; 558 PetscCheck(idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 559 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 560 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 561 } 562 ridxs[nleaves].rank = p; 563 ridxs[nleaves].index = idxs[r] - owners[p]; 564 ilocal[nleaves] = r; 565 nleaves++; 566 } 567 PetscCall(PetscSFCreate(map->comm, &sf)); 568 PetscCall(PetscSFSetGraph(sf, n, nleaves, ilocal, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 569 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 570 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 571 if (ogidxs) { /* communicate global idxs */ 572 PetscInt cum = 0, start, *work2; 573 574 PetscCall(PetscMalloc1(n, &work)); 575 PetscCall(PetscCalloc1(N, &work2)); 576 for (r = 0; r < N; ++r) 577 if (idxs[r] >= 0) cum++; 578 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 579 start -= cum; 580 cum = 0; 581 for (r = 0; r < N; ++r) 582 if (idxs[r] >= 0) work2[r] = start + cum++; 583 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 584 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 585 PetscCall(PetscFree(work2)); 586 } 587 PetscCall(PetscSFDestroy(&sf)); 588 /* Compress and put in indices */ 589 for (r = 0; r < n; ++r) 590 if (lidxs[r] >= 0) { 591 if (work) work[len] = work[r]; 592 lidxs[len++] = r; 593 } 594 if (on) *on = len; 595 if (oidxs) *oidxs = lidxs; 596 if (ogidxs) *ogidxs = work; 597 PetscFunctionReturn(PETSC_SUCCESS); 598 } 599 600 /*@ 601 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 602 603 Collective 604 605 Input Parameters: 606 + layout - `PetscLayout` defining the global index space and the rank that brokers each index 607 . numRootIndices - size of rootIndices 608 . rootIndices - `PetscInt` array of global indices of which this process requests ownership 609 . rootLocalIndices - root local index permutation (NULL if no permutation) 610 . rootLocalOffset - offset to be added to root local indices 611 . numLeafIndices - size of leafIndices 612 . leafIndices - `PetscInt` array of global indices with which this process requires data associated 613 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 614 - leafLocalOffset - offset to be added to leaf local indices 615 616 Output Parameters: 617 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 618 - sf - star forest representing the communication pattern from the root space to the leaf space 619 620 Level: advanced 621 622 Example 1: 623 .vb 624 rank : 0 1 2 625 rootIndices : [1 0 2] [3] [3] 626 rootLocalOffset : 100 200 300 627 layout : [0 1] [2] [3] 628 leafIndices : [0] [2] [0 3] 629 leafLocalOffset : 400 500 600 630 631 would build the following PetscSF 632 633 [0] 400 <- (0,101) 634 [1] 500 <- (0,102) 635 [2] 600 <- (0,101) 636 [2] 601 <- (2,300) 637 .ve 638 639 Example 2: 640 .vb 641 rank : 0 1 2 642 rootIndices : [1 0 2] [3] [3] 643 rootLocalOffset : 100 200 300 644 layout : [0 1] [2] [3] 645 leafIndices : rootIndices rootIndices rootIndices 646 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 647 648 would build the following PetscSF 649 650 [1] 200 <- (2,300) 651 .ve 652 653 Example 3: 654 .vb 655 No process requests ownership of global index 1, but no process needs it. 656 657 rank : 0 1 2 658 numRootIndices : 2 1 1 659 rootIndices : [0 2] [3] [3] 660 rootLocalOffset : 100 200 300 661 layout : [0 1] [2] [3] 662 numLeafIndices : 1 1 2 663 leafIndices : [0] [2] [0 3] 664 leafLocalOffset : 400 500 600 665 666 would build the following PetscSF 667 668 [0] 400 <- (0,100) 669 [1] 500 <- (0,101) 670 [2] 600 <- (0,100) 671 [2] 601 <- (2,300) 672 .ve 673 674 Notes: 675 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 676 local size can be set to `PETSC_DECIDE`. 677 678 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 679 ownership of x and sends its own rank and the local index of x to process i. 680 If multiple processes request ownership of x, the one with the highest rank is to own x. 681 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 682 ownership information of x. 683 The output sf is constructed by associating each leaf point to a root point in this way. 684 685 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 686 The optional output `PetscSF` object sfA can be used to push such data to leaf points. 687 688 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 689 must cover that of leafIndices, but need not cover the entire layout. 690 691 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 692 star forest is almost identity, so will only include non-trivial part of the map. 693 694 Developer Notes: 695 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 696 hash(rank, root_local_index) as the bid for the ownership determination. 697 698 .seealso: `PetscSF`, `PetscSFCreate()` 699 @*/ 700 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 701 { 702 MPI_Comm comm = layout->comm; 703 PetscMPIInt size, rank; 704 PetscSF sf1; 705 PetscSFNode *owners, *buffer, *iremote; 706 PetscInt *ilocal, nleaves, N, n, i; 707 #if defined(PETSC_USE_DEBUG) 708 PetscInt N1; 709 #endif 710 PetscBool flag; 711 712 PetscFunctionBegin; 713 if (rootIndices) PetscAssertPointer(rootIndices, 3); 714 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 715 if (leafIndices) PetscAssertPointer(leafIndices, 7); 716 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 717 if (sfA) PetscAssertPointer(sfA, 10); 718 PetscAssertPointer(sf, 11); 719 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 720 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 721 PetscCallMPI(MPI_Comm_size(comm, &size)); 722 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 723 PetscCall(PetscLayoutSetUp(layout)); 724 PetscCall(PetscLayoutGetSize(layout, &N)); 725 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 726 flag = (PetscBool)(leafIndices == rootIndices); 727 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPI_C_BOOL, MPI_LAND, comm)); 728 PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 729 #if defined(PETSC_USE_DEBUG) 730 N1 = PETSC_INT_MIN; 731 for (i = 0; i < numRootIndices; i++) 732 if (rootIndices[i] > N1) N1 = rootIndices[i]; 733 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 734 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 735 if (!flag) { 736 N1 = PETSC_INT_MIN; 737 for (i = 0; i < numLeafIndices; i++) 738 if (leafIndices[i] > N1) N1 = leafIndices[i]; 739 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 740 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 741 } 742 #endif 743 /* Reduce: owners -> buffer */ 744 PetscCall(PetscMalloc1(n, &buffer)); 745 PetscCall(PetscSFCreate(comm, &sf1)); 746 PetscCall(PetscSFSetFromOptions(sf1)); 747 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 748 PetscCall(PetscMalloc1(numRootIndices, &owners)); 749 for (i = 0; i < numRootIndices; ++i) { 750 owners[i].rank = rank; 751 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 752 } 753 for (i = 0; i < n; ++i) { 754 buffer[i].index = -1; 755 buffer[i].rank = -1; 756 } 757 PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 758 PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 759 /* Bcast: buffer -> owners */ 760 if (!flag) { 761 /* leafIndices is different from rootIndices */ 762 PetscCall(PetscFree(owners)); 763 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 764 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 765 } 766 PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 767 PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 768 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 769 PetscCall(PetscFree(buffer)); 770 if (sfA) { 771 *sfA = sf1; 772 } else PetscCall(PetscSFDestroy(&sf1)); 773 /* Create sf */ 774 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 775 /* leaf space == root space */ 776 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 777 if (owners[i].rank != rank) ++nleaves; 778 PetscCall(PetscMalloc1(nleaves, &ilocal)); 779 PetscCall(PetscMalloc1(nleaves, &iremote)); 780 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 781 if (owners[i].rank != rank) { 782 ilocal[nleaves] = leafLocalOffset + i; 783 iremote[nleaves].rank = owners[i].rank; 784 iremote[nleaves].index = owners[i].index; 785 ++nleaves; 786 } 787 } 788 PetscCall(PetscFree(owners)); 789 } else { 790 nleaves = numLeafIndices; 791 PetscCall(PetscMalloc1(nleaves, &ilocal)); 792 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 793 iremote = owners; 794 } 795 PetscCall(PetscSFCreate(comm, sf)); 796 PetscCall(PetscSFSetFromOptions(*sf)); 797 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 /*@ 802 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 803 804 Collective 805 806 Input Parameters: 807 + sfa - default `PetscSF` 808 - sfb - additional edges to add/replace edges in sfa 809 810 Output Parameter: 811 . merged - new `PetscSF` with combined edges 812 813 Level: intermediate 814 815 .seealso: `PetscSFCompose()` 816 @*/ 817 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 818 { 819 PetscInt maxleaf; 820 821 PetscFunctionBegin; 822 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 823 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 824 PetscCheckSameComm(sfa, 1, sfb, 2); 825 PetscAssertPointer(merged, 3); 826 { 827 PetscInt aleaf, bleaf; 828 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 829 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 830 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 831 } 832 PetscInt *clocal, aroots, aleaves, broots, bleaves; 833 PetscSFNode *cremote; 834 const PetscInt *alocal, *blocal; 835 const PetscSFNode *aremote, *bremote; 836 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 837 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 838 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 839 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 840 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 841 for (PetscInt i = 0; i < aleaves; i++) { 842 PetscInt a = alocal ? alocal[i] : i; 843 clocal[a] = a; 844 cremote[a] = aremote[i]; 845 } 846 for (PetscInt i = 0; i < bleaves; i++) { 847 PetscInt b = blocal ? blocal[i] : i; 848 clocal[b] = b; 849 cremote[b] = bremote[i]; 850 } 851 PetscInt nleaves = 0; 852 for (PetscInt i = 0; i < maxleaf; i++) { 853 if (clocal[i] < 0) continue; 854 clocal[nleaves] = clocal[i]; 855 cremote[nleaves] = cremote[i]; 856 nleaves++; 857 } 858 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 859 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 860 PetscCall(PetscFree2(clocal, cremote)); 861 PetscFunctionReturn(PETSC_SUCCESS); 862 } 863 864 /*@ 865 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 866 867 Collective 868 869 Input Parameters: 870 + sf - star forest 871 . bs - stride 872 . ldr - leading dimension of root space 873 - ldl - leading dimension of leaf space 874 875 Output Parameter: 876 . vsf - the new `PetscSF` 877 878 Level: intermediate 879 880 Notes: 881 This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence 882 .vb 883 c_datatype *roots, *leaves; 884 for i in [0,bs) do 885 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 886 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 887 .ve 888 is equivalent to 889 .vb 890 c_datatype *roots, *leaves; 891 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 892 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 893 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 894 .ve 895 896 Developer Notes: 897 Should this functionality be handled with a new API instead of creating a new object? 898 899 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 900 @*/ 901 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 902 { 903 PetscSF rankssf; 904 const PetscSFNode *iremote, *sfrremote; 905 PetscSFNode *viremote; 906 const PetscInt *ilocal; 907 PetscInt *vilocal = NULL, *ldrs; 908 PetscInt nranks, nr, nl, vnr, vnl, maxl; 909 PetscMPIInt rank; 910 MPI_Comm comm; 911 PetscSFType sftype; 912 913 PetscFunctionBegin; 914 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 915 PetscValidLogicalCollectiveInt(sf, bs, 2); 916 PetscAssertPointer(vsf, 5); 917 if (bs == 1) { 918 PetscCall(PetscObjectReference((PetscObject)sf)); 919 *vsf = sf; 920 PetscFunctionReturn(PETSC_SUCCESS); 921 } 922 PetscCall(PetscSFSetUp(sf)); 923 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 924 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 925 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 926 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 927 maxl += 1; 928 if (ldl == PETSC_DECIDE) ldl = maxl; 929 if (ldr == PETSC_DECIDE) ldr = nr; 930 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 931 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 932 vnr = nr * bs; 933 vnl = nl * bs; 934 PetscCall(PetscMalloc1(vnl, &viremote)); 935 PetscCall(PetscMalloc1(vnl, &vilocal)); 936 937 /* Communicate root leading dimensions to leaf ranks */ 938 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 939 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 940 PetscCall(PetscMalloc1(nranks, &ldrs)); 941 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 942 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 943 944 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 945 const PetscInt r = iremote[i].rank; 946 const PetscInt ii = iremote[i].index; 947 948 if (r == rank) lda = ldr; 949 else if (rold != r) { 950 PetscInt j; 951 952 for (j = 0; j < nranks; j++) 953 if (sfrremote[j].rank == r) break; 954 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 955 lda = ldrs[j]; 956 } 957 rold = r; 958 for (PetscInt v = 0; v < bs; v++) { 959 viremote[v * nl + i].rank = r; 960 viremote[v * nl + i].index = v * lda + ii; 961 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 962 } 963 } 964 PetscCall(PetscFree(ldrs)); 965 PetscCall(PetscSFCreate(comm, vsf)); 966 PetscCall(PetscSFGetType(sf, &sftype)); 967 PetscCall(PetscSFSetType(*vsf, sftype)); 968 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 969 PetscFunctionReturn(PETSC_SUCCESS); 970 } 971