1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - gremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscCall(PetscLayoutSetUp(layout)); 40 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 41 PetscCall(PetscLayoutGetRanges(layout, &range)); 42 PetscCall(PetscMalloc1(nleaves, &remote)); 43 if (nleaves) ls = gremote[0] + 1; 44 for (i = 0; i < nleaves; i++) { 45 const PetscInt idx = gremote[i] - ls; 46 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 47 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 48 remote[i].rank = lr; 49 ls = range[lr]; 50 ln = range[lr + 1] - ls; 51 } else { 52 remote[i].rank = lr; 53 remote[i].index = idx; 54 } 55 } 56 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest 62 63 Collective 64 65 Input Parameter: 66 . sf - star forest 67 68 Output Parameters: 69 + layout - `PetscLayout` defining the global space for roots 70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 71 . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage 72 - gremote - root vertices in global numbering corresponding to leaves in ilocal 73 74 Level: intermediate 75 76 Notes: 77 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 78 The outputs `layout` and `gremote` are freshly created each time this function is called, 79 so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user. 80 81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82 @*/ 83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 84 { 85 PetscInt nr, nl; 86 const PetscSFNode *ir; 87 PetscLayout lt; 88 89 PetscFunctionBegin; 90 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 92 if (gremote) { 93 PetscInt i; 94 const PetscInt *range; 95 PetscInt *gr; 96 97 PetscCall(PetscLayoutGetRanges(lt, &range)); 98 PetscCall(PetscMalloc1(nl, &gr)); 99 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 100 *gremote = gr; 101 } 102 if (nleaves) *nleaves = nl; 103 if (layout) *layout = lt; 104 else PetscCall(PetscLayoutDestroy(<)); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 /*@ 109 PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 110 111 Input Parameters: 112 + sf - The `PetscSF` 113 . localSection - `PetscSection` describing the local data layout 114 - globalSection - `PetscSection` describing the global data layout 115 116 Level: developer 117 118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 119 @*/ 120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 121 { 122 MPI_Comm comm; 123 PetscLayout layout; 124 const PetscInt *ranges; 125 PetscInt *local; 126 PetscSFNode *remote; 127 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 128 PetscMPIInt size, rank; 129 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 133 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 134 135 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 136 PetscCallMPI(MPI_Comm_size(comm, &size)); 137 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 138 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 139 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 140 PetscCall(PetscLayoutCreate(comm, &layout)); 141 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 142 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 143 PetscCall(PetscLayoutSetUp(layout)); 144 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 145 for (p = pStart; p < pEnd; ++p) { 146 PetscInt gdof, gcdof; 147 148 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 149 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 150 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof); 151 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 152 } 153 PetscCall(PetscMalloc1(nleaves, &local)); 154 PetscCall(PetscMalloc1(nleaves, &remote)); 155 for (p = pStart, l = 0; p < pEnd; ++p) { 156 const PetscInt *cind; 157 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 158 159 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 160 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 161 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 162 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 163 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 164 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 165 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 166 if (!gdof) continue; /* Censored point */ 167 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 168 if (gsize != dof - cdof) { 169 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 170 cdof = 0; /* Ignore constraints */ 171 } 172 for (d = 0, c = 0; d < dof; ++d) { 173 if ((c < cdof) && (cind[c] == d)) { 174 ++c; 175 continue; 176 } 177 local[l + d - c] = off + d; 178 } 179 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 180 if (gdof < 0) { 181 for (d = 0; d < gsize; ++d, ++l) { 182 PetscInt offset = -(goff + 1) + d, ir; 183 PetscMPIInt r; 184 185 PetscCall(PetscFindInt(offset, size + 1, ranges, &ir)); 186 PetscCall(PetscMPIIntCast(ir, &r)); 187 if (r < 0) r = -(r + 2); 188 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 189 remote[l].rank = r; 190 remote[l].index = offset - ranges[r]; 191 } 192 } else { 193 for (d = 0; d < gsize; ++d, ++l) { 194 remote[l].rank = rank; 195 remote[l].index = goff + d - ranges[rank]; 196 } 197 } 198 } 199 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 /*@C 206 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 207 208 Collective 209 210 Input Parameters: 211 + sf - The `PetscSF` 212 - rootSection - Section defined on root space 213 214 Output Parameters: 215 + remoteOffsets - root offsets in leaf storage, or `NULL` 216 - leafSection - Section defined on the leaf space 217 218 Level: advanced 219 220 Fortran Notes: 221 In Fortran, use PetscSFDistributeSectionF90() 222 223 .seealso: `PetscSF`, `PetscSFCreate()` 224 @*/ 225 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) 226 { 227 PetscSF embedSF; 228 const PetscInt *indices; 229 IS selected; 230 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c; 231 PetscBool *sub, hasc; 232 PetscMPIInt msize; 233 234 PetscFunctionBegin; 235 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 236 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 237 if (numFields) { 238 IS perm; 239 240 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 241 leafSection->perm. To keep this permutation set by the user, we grab 242 the reference before calling PetscSectionSetNumFields() and set it 243 back after. */ 244 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 245 PetscCall(PetscObjectReference((PetscObject)perm)); 246 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 247 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 248 PetscCall(ISDestroy(&perm)); 249 } 250 PetscCall(PetscMalloc1(numFields + 2, &sub)); 251 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 252 for (f = 0; f < numFields; ++f) { 253 PetscSectionSym sym, dsym = NULL; 254 const char *name = NULL; 255 PetscInt numComp = 0; 256 257 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 258 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 259 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 260 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 261 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 262 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 263 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 264 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 265 PetscCall(PetscSectionSymDestroy(&dsym)); 266 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 267 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 268 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 269 } 270 } 271 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 272 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 273 rpEnd = PetscMin(rpEnd, nroots); 274 rpEnd = PetscMax(rpStart, rpEnd); 275 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 276 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 277 PetscCall(PetscMPIIntCast(2 + numFields, &msize)); 278 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, msize, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 279 if (sub[0]) { 280 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 281 PetscCall(ISGetIndices(selected, &indices)); 282 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 283 PetscCall(ISRestoreIndices(selected, &indices)); 284 PetscCall(ISDestroy(&selected)); 285 } else { 286 PetscCall(PetscObjectReference((PetscObject)sf)); 287 embedSF = sf; 288 } 289 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 290 lpEnd++; 291 292 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 293 294 /* Constrained dof section */ 295 hasc = sub[1]; 296 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 297 298 /* Could fuse these at the cost of copies and extra allocation */ 299 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 300 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 301 if (sub[1]) { 302 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 303 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 304 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 305 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 306 } 307 for (f = 0; f < numFields; ++f) { 308 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 309 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 310 if (sub[2 + f]) { 311 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 312 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 313 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 314 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 315 } 316 } 317 if (remoteOffsets) { 318 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 319 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 320 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 321 } 322 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 323 PetscCall(PetscSectionSetUp(leafSection)); 324 if (hasc) { /* need to communicate bcIndices */ 325 PetscSF bcSF; 326 PetscInt *rOffBc; 327 328 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 329 if (sub[1]) { 330 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 331 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 332 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 333 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 334 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 335 PetscCall(PetscSFDestroy(&bcSF)); 336 } 337 for (f = 0; f < numFields; ++f) { 338 if (sub[2 + f]) { 339 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 341 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 342 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 343 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 344 PetscCall(PetscSFDestroy(&bcSF)); 345 } 346 } 347 PetscCall(PetscFree(rOffBc)); 348 } 349 PetscCall(PetscSFDestroy(&embedSF)); 350 PetscCall(PetscFree(sub)); 351 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 352 PetscFunctionReturn(PETSC_SUCCESS); 353 } 354 355 /*@C 356 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 357 358 Collective 359 360 Input Parameters: 361 + sf - The `PetscSF` 362 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 363 - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 364 365 Output Parameter: 366 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 367 368 Level: developer 369 370 Fortran Notes: 371 In Fortran, use PetscSFCreateRemoteOffsetsF90() 372 373 .seealso: `PetscSF`, `PetscSFCreate()` 374 @*/ 375 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) 376 { 377 PetscSF embedSF; 378 const PetscInt *indices; 379 IS selected; 380 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 381 382 PetscFunctionBegin; 383 *remoteOffsets = NULL; 384 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 385 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 386 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 387 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 388 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 389 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 390 PetscCall(ISGetIndices(selected, &indices)); 391 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 392 PetscCall(ISRestoreIndices(selected, &indices)); 393 PetscCall(ISDestroy(&selected)); 394 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 395 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 396 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 397 PetscCall(PetscSFDestroy(&embedSF)); 398 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 /*@C 403 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 404 405 Collective 406 407 Input Parameters: 408 + sf - The `PetscSF` 409 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 410 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 411 - leafSection - Data layout of local points for incoming data (this is the distributed section) 412 413 Output Parameter: 414 . sectionSF - The new `PetscSF` 415 416 Level: advanced 417 418 Notes: 419 `remoteOffsets` can be NULL if `sf` does not reference any points in leafSection 420 421 Fortran Notes: 422 In Fortran, use PetscSFCreateSectionSFF90() 423 424 .seealso: `PetscSF`, `PetscSFCreate()` 425 @*/ 426 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 427 { 428 MPI_Comm comm; 429 const PetscInt *localPoints; 430 const PetscSFNode *remotePoints; 431 PetscInt lpStart, lpEnd; 432 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 433 PetscInt *localIndices; 434 PetscSFNode *remoteIndices; 435 PetscInt i, ind; 436 437 PetscFunctionBegin; 438 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 439 PetscAssertPointer(rootSection, 2); 440 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 441 PetscAssertPointer(leafSection, 4); 442 PetscAssertPointer(sectionSF, 5); 443 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 444 PetscCall(PetscSFCreate(comm, sectionSF)); 445 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 446 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 447 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 448 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 449 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 450 for (i = 0; i < numPoints; ++i) { 451 PetscInt localPoint = localPoints ? localPoints[i] : i; 452 PetscInt dof; 453 454 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 455 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 456 numIndices += dof < 0 ? 0 : dof; 457 } 458 } 459 PetscCall(PetscMalloc1(numIndices, &localIndices)); 460 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 461 /* Create new index graph */ 462 for (i = 0, ind = 0; i < numPoints; ++i) { 463 PetscInt localPoint = localPoints ? localPoints[i] : i; 464 PetscInt rank = remotePoints[i].rank; 465 466 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 467 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 468 PetscInt loff, dof, d; 469 470 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 471 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 472 for (d = 0; d < dof; ++d, ++ind) { 473 localIndices[ind] = loff + d; 474 remoteIndices[ind].rank = rank; 475 remoteIndices[ind].index = remoteOffset + d; 476 } 477 } 478 } 479 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 480 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 481 PetscCall(PetscSFSetUp(*sectionSF)); 482 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 483 PetscFunctionReturn(PETSC_SUCCESS); 484 } 485 486 /*@C 487 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects 488 489 Collective 490 491 Input Parameters: 492 + rmap - `PetscLayout` defining the global root space 493 - lmap - `PetscLayout` defining the global leaf space 494 495 Output Parameter: 496 . sf - The parallel star forest 497 498 Level: intermediate 499 500 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 501 @*/ 502 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 503 { 504 PetscInt i, nroots, nleaves = 0; 505 PetscInt rN, lst, len; 506 PetscMPIInt owner = -1; 507 PetscSFNode *remote; 508 MPI_Comm rcomm = rmap->comm; 509 MPI_Comm lcomm = lmap->comm; 510 PetscMPIInt flg; 511 512 PetscFunctionBegin; 513 PetscAssertPointer(sf, 3); 514 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 515 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 516 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 517 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 518 PetscCall(PetscSFCreate(rcomm, sf)); 519 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 520 PetscCall(PetscLayoutGetSize(rmap, &rN)); 521 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 522 PetscCall(PetscMalloc1(len - lst, &remote)); 523 for (i = lst; i < len && i < rN; i++) { 524 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 525 remote[nleaves].rank = owner; 526 remote[nleaves].index = i - rmap->range[owner]; 527 nleaves++; 528 } 529 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 530 PetscCall(PetscFree(remote)); 531 PetscFunctionReturn(PETSC_SUCCESS); 532 } 533 534 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 535 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt **oidxs, PetscInt **ogidxs) 536 { 537 PetscInt *owners = map->range; 538 PetscInt n = map->n; 539 PetscSF sf; 540 PetscInt *lidxs, *work = NULL; 541 PetscSFNode *ridxs; 542 PetscMPIInt rank, p = 0; 543 PetscInt r, len = 0; 544 545 PetscFunctionBegin; 546 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 547 /* Create SF where leaves are input idxs and roots are owned idxs */ 548 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 549 PetscCall(PetscMalloc1(n, &lidxs)); 550 for (r = 0; r < n; ++r) lidxs[r] = -1; 551 PetscCall(PetscMalloc1(N, &ridxs)); 552 for (r = 0; r < N; ++r) { 553 const PetscInt idx = idxs[r]; 554 PetscCheck(idx >= 0 && idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 555 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 556 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 557 } 558 ridxs[r].rank = p; 559 ridxs[r].index = idxs[r] - owners[p]; 560 } 561 PetscCall(PetscSFCreate(map->comm, &sf)); 562 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 563 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 564 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 565 if (ogidxs) { /* communicate global idxs */ 566 PetscInt cum = 0, start, *work2; 567 568 PetscCall(PetscMalloc1(n, &work)); 569 PetscCall(PetscCalloc1(N, &work2)); 570 for (r = 0; r < N; ++r) 571 if (idxs[r] >= 0) cum++; 572 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 573 start -= cum; 574 cum = 0; 575 for (r = 0; r < N; ++r) 576 if (idxs[r] >= 0) work2[r] = start + cum++; 577 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 578 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 579 PetscCall(PetscFree(work2)); 580 } 581 PetscCall(PetscSFDestroy(&sf)); 582 /* Compress and put in indices */ 583 for (r = 0; r < n; ++r) 584 if (lidxs[r] >= 0) { 585 if (work) work[len] = work[r]; 586 lidxs[len++] = r; 587 } 588 if (on) *on = len; 589 if (oidxs) *oidxs = lidxs; 590 if (ogidxs) *ogidxs = work; 591 PetscFunctionReturn(PETSC_SUCCESS); 592 } 593 594 /*@ 595 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 596 597 Collective 598 599 Input Parameters: 600 + layout - `PetscLayout` defining the global index space and the rank that brokers each index 601 . numRootIndices - size of rootIndices 602 . rootIndices - `PetscInt` array of global indices of which this process requests ownership 603 . rootLocalIndices - root local index permutation (NULL if no permutation) 604 . rootLocalOffset - offset to be added to root local indices 605 . numLeafIndices - size of leafIndices 606 . leafIndices - `PetscInt` array of global indices with which this process requires data associated 607 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 608 - leafLocalOffset - offset to be added to leaf local indices 609 610 Output Parameters: 611 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 612 - sf - star forest representing the communication pattern from the root space to the leaf space 613 614 Level: advanced 615 616 Example 1: 617 .vb 618 rank : 0 1 2 619 rootIndices : [1 0 2] [3] [3] 620 rootLocalOffset : 100 200 300 621 layout : [0 1] [2] [3] 622 leafIndices : [0] [2] [0 3] 623 leafLocalOffset : 400 500 600 624 625 would build the following PetscSF 626 627 [0] 400 <- (0,101) 628 [1] 500 <- (0,102) 629 [2] 600 <- (0,101) 630 [2] 601 <- (2,300) 631 .ve 632 633 Example 2: 634 .vb 635 rank : 0 1 2 636 rootIndices : [1 0 2] [3] [3] 637 rootLocalOffset : 100 200 300 638 layout : [0 1] [2] [3] 639 leafIndices : rootIndices rootIndices rootIndices 640 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 641 642 would build the following PetscSF 643 644 [1] 200 <- (2,300) 645 .ve 646 647 Example 3: 648 .vb 649 No process requests ownership of global index 1, but no process needs it. 650 651 rank : 0 1 2 652 numRootIndices : 2 1 1 653 rootIndices : [0 2] [3] [3] 654 rootLocalOffset : 100 200 300 655 layout : [0 1] [2] [3] 656 numLeafIndices : 1 1 2 657 leafIndices : [0] [2] [0 3] 658 leafLocalOffset : 400 500 600 659 660 would build the following PetscSF 661 662 [0] 400 <- (0,100) 663 [1] 500 <- (0,101) 664 [2] 600 <- (0,100) 665 [2] 601 <- (2,300) 666 .ve 667 668 Notes: 669 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 670 local size can be set to `PETSC_DECIDE`. 671 672 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 673 ownership of x and sends its own rank and the local index of x to process i. 674 If multiple processes request ownership of x, the one with the highest rank is to own x. 675 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 676 ownership information of x. 677 The output sf is constructed by associating each leaf point to a root point in this way. 678 679 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 680 The optional output `PetscSF` object sfA can be used to push such data to leaf points. 681 682 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 683 must cover that of leafIndices, but need not cover the entire layout. 684 685 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 686 star forest is almost identity, so will only include non-trivial part of the map. 687 688 Developer Notes: 689 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 690 hash(rank, root_local_index) as the bid for the ownership determination. 691 692 .seealso: `PetscSF`, `PetscSFCreate()` 693 @*/ 694 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 695 { 696 MPI_Comm comm = layout->comm; 697 PetscMPIInt size, rank; 698 PetscSF sf1; 699 PetscSFNode *owners, *buffer, *iremote; 700 PetscInt *ilocal, nleaves, N, n, i; 701 #if defined(PETSC_USE_DEBUG) 702 PetscInt N1; 703 #endif 704 PetscBool flag; 705 706 PetscFunctionBegin; 707 if (rootIndices) PetscAssertPointer(rootIndices, 3); 708 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 709 if (leafIndices) PetscAssertPointer(leafIndices, 7); 710 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 711 if (sfA) PetscAssertPointer(sfA, 10); 712 PetscAssertPointer(sf, 11); 713 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 714 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 715 PetscCallMPI(MPI_Comm_size(comm, &size)); 716 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 717 PetscCall(PetscLayoutSetUp(layout)); 718 PetscCall(PetscLayoutGetSize(layout, &N)); 719 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 720 flag = (PetscBool)(leafIndices == rootIndices); 721 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 722 PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 723 #if defined(PETSC_USE_DEBUG) 724 N1 = PETSC_INT_MIN; 725 for (i = 0; i < numRootIndices; i++) 726 if (rootIndices[i] > N1) N1 = rootIndices[i]; 727 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 728 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 729 if (!flag) { 730 N1 = PETSC_INT_MIN; 731 for (i = 0; i < numLeafIndices; i++) 732 if (leafIndices[i] > N1) N1 = leafIndices[i]; 733 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 734 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 735 } 736 #endif 737 /* Reduce: owners -> buffer */ 738 PetscCall(PetscMalloc1(n, &buffer)); 739 PetscCall(PetscSFCreate(comm, &sf1)); 740 PetscCall(PetscSFSetFromOptions(sf1)); 741 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 742 PetscCall(PetscMalloc1(numRootIndices, &owners)); 743 for (i = 0; i < numRootIndices; ++i) { 744 owners[i].rank = rank; 745 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 746 } 747 for (i = 0; i < n; ++i) { 748 buffer[i].index = -1; 749 buffer[i].rank = -1; 750 } 751 PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 752 PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 753 /* Bcast: buffer -> owners */ 754 if (!flag) { 755 /* leafIndices is different from rootIndices */ 756 PetscCall(PetscFree(owners)); 757 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 758 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 759 } 760 PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 761 PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 762 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 763 PetscCall(PetscFree(buffer)); 764 if (sfA) { 765 *sfA = sf1; 766 } else PetscCall(PetscSFDestroy(&sf1)); 767 /* Create sf */ 768 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 769 /* leaf space == root space */ 770 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 771 if (owners[i].rank != rank) ++nleaves; 772 PetscCall(PetscMalloc1(nleaves, &ilocal)); 773 PetscCall(PetscMalloc1(nleaves, &iremote)); 774 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 775 if (owners[i].rank != rank) { 776 ilocal[nleaves] = leafLocalOffset + i; 777 iremote[nleaves].rank = owners[i].rank; 778 iremote[nleaves].index = owners[i].index; 779 ++nleaves; 780 } 781 } 782 PetscCall(PetscFree(owners)); 783 } else { 784 nleaves = numLeafIndices; 785 PetscCall(PetscMalloc1(nleaves, &ilocal)); 786 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 787 iremote = owners; 788 } 789 PetscCall(PetscSFCreate(comm, sf)); 790 PetscCall(PetscSFSetFromOptions(*sf)); 791 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 /*@ 796 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 797 798 Collective 799 800 Input Parameters: 801 + sfa - default `PetscSF` 802 - sfb - additional edges to add/replace edges in sfa 803 804 Output Parameter: 805 . merged - new `PetscSF` with combined edges 806 807 Level: intermediate 808 809 .seealso: `PetscSFCompose()` 810 @*/ 811 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 812 { 813 PetscInt maxleaf; 814 815 PetscFunctionBegin; 816 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 817 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 818 PetscCheckSameComm(sfa, 1, sfb, 2); 819 PetscAssertPointer(merged, 3); 820 { 821 PetscInt aleaf, bleaf; 822 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 823 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 824 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 825 } 826 PetscInt *clocal, aroots, aleaves, broots, bleaves; 827 PetscSFNode *cremote; 828 const PetscInt *alocal, *blocal; 829 const PetscSFNode *aremote, *bremote; 830 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 831 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 832 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 833 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 834 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 835 for (PetscInt i = 0; i < aleaves; i++) { 836 PetscInt a = alocal ? alocal[i] : i; 837 clocal[a] = a; 838 cremote[a] = aremote[i]; 839 } 840 for (PetscInt i = 0; i < bleaves; i++) { 841 PetscInt b = blocal ? blocal[i] : i; 842 clocal[b] = b; 843 cremote[b] = bremote[i]; 844 } 845 PetscInt nleaves = 0; 846 for (PetscInt i = 0; i < maxleaf; i++) { 847 if (clocal[i] < 0) continue; 848 clocal[nleaves] = clocal[i]; 849 cremote[nleaves] = cremote[i]; 850 nleaves++; 851 } 852 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 853 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 854 PetscCall(PetscFree2(clocal, cremote)); 855 PetscFunctionReturn(PETSC_SUCCESS); 856 } 857 858 /*@ 859 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 860 861 Collective 862 863 Input Parameters: 864 + sf - star forest 865 . bs - stride 866 . ldr - leading dimension of root space 867 - ldl - leading dimension of leaf space 868 869 Output Parameter: 870 . vsf - the new `PetscSF` 871 872 Level: intermediate 873 874 Notes: 875 This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence 876 .vb 877 c_datatype *roots, *leaves; 878 for i in [0,bs) do 879 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 880 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 881 .ve 882 is equivalent to 883 .vb 884 c_datatype *roots, *leaves; 885 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 886 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 887 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 888 .ve 889 890 Developer Notes: 891 Should this functionality be handled with a new API instead of creating a new object? 892 893 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 894 @*/ 895 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 896 { 897 PetscSF rankssf; 898 const PetscSFNode *iremote, *sfrremote; 899 PetscSFNode *viremote; 900 const PetscInt *ilocal; 901 PetscInt *vilocal = NULL, *ldrs; 902 PetscInt nranks, nr, nl, vnr, vnl, maxl; 903 PetscMPIInt rank; 904 MPI_Comm comm; 905 PetscSFType sftype; 906 907 PetscFunctionBegin; 908 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 909 PetscValidLogicalCollectiveInt(sf, bs, 2); 910 PetscAssertPointer(vsf, 5); 911 if (bs == 1) { 912 PetscCall(PetscObjectReference((PetscObject)sf)); 913 *vsf = sf; 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } 916 PetscCall(PetscSFSetUp(sf)); 917 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 918 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 919 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 920 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 921 maxl += 1; 922 if (ldl == PETSC_DECIDE) ldl = maxl; 923 if (ldr == PETSC_DECIDE) ldr = nr; 924 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 925 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 926 vnr = nr * bs; 927 vnl = nl * bs; 928 PetscCall(PetscMalloc1(vnl, &viremote)); 929 PetscCall(PetscMalloc1(vnl, &vilocal)); 930 931 /* Communicate root leading dimensions to leaf ranks */ 932 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 933 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 934 PetscCall(PetscMalloc1(nranks, &ldrs)); 935 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 936 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 937 938 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 939 const PetscInt r = iremote[i].rank; 940 const PetscInt ii = iremote[i].index; 941 942 if (r == rank) lda = ldr; 943 else if (rold != r) { 944 PetscInt j; 945 946 for (j = 0; j < nranks; j++) 947 if (sfrremote[j].rank == r) break; 948 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 949 lda = ldrs[j]; 950 } 951 rold = r; 952 for (PetscInt v = 0; v < bs; v++) { 953 viremote[v * nl + i].rank = r; 954 viremote[v * nl + i].index = v * lda + ii; 955 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 956 } 957 } 958 PetscCall(PetscFree(ldrs)); 959 PetscCall(PetscSFCreate(comm, vsf)); 960 PetscCall(PetscSFGetType(sf, &sftype)); 961 PetscCall(PetscSFSetType(*vsf, sftype)); 962 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965