1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - gremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscCall(PetscLayoutSetUp(layout)); 40 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 41 PetscCall(PetscLayoutGetRanges(layout, &range)); 42 PetscCall(PetscMalloc1(nleaves, &remote)); 43 if (nleaves) ls = gremote[0] + 1; 44 for (i = 0; i < nleaves; i++) { 45 const PetscInt idx = gremote[i] - ls; 46 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 47 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 48 remote[i].rank = lr; 49 ls = range[lr]; 50 ln = range[lr + 1] - ls; 51 } else { 52 remote[i].rank = lr; 53 remote[i].index = idx; 54 } 55 } 56 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest 62 63 Collective 64 65 Input Parameter: 66 . sf - star forest 67 68 Output Parameters: 69 + layout - `PetscLayout` defining the global space for roots 70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 71 . ilocal - locations of leaves in leafdata buffers, or NULL for contiguous storage 72 - gremote - root vertices in global numbering corresponding to leaves in ilocal 73 74 Level: intermediate 75 76 Notes: 77 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 78 The outputs layout and gremote are freshly created each time this function is called, 79 so they need to be freed by user and cannot be qualified as const. 80 81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82 @*/ 83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 84 { 85 PetscInt nr, nl; 86 const PetscSFNode *ir; 87 PetscLayout lt; 88 89 PetscFunctionBegin; 90 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 92 if (gremote) { 93 PetscInt i; 94 const PetscInt *range; 95 PetscInt *gr; 96 97 PetscCall(PetscLayoutGetRanges(lt, &range)); 98 PetscCall(PetscMalloc1(nl, &gr)); 99 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 100 *gremote = gr; 101 } 102 if (nleaves) *nleaves = nl; 103 if (layout) *layout = lt; 104 else PetscCall(PetscLayoutDestroy(<)); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 /*@ 109 PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 110 111 Input Parameters: 112 + sf - The `PetscSF` 113 . localSection - `PetscSection` describing the local data layout 114 - globalSection - `PetscSection` describing the global data layout 115 116 Level: developer 117 118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 119 @*/ 120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 121 { 122 MPI_Comm comm; 123 PetscLayout layout; 124 const PetscInt *ranges; 125 PetscInt *local; 126 PetscSFNode *remote; 127 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 128 PetscMPIInt size, rank; 129 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 133 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 134 135 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 136 PetscCallMPI(MPI_Comm_size(comm, &size)); 137 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 138 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 139 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 140 PetscCall(PetscLayoutCreate(comm, &layout)); 141 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 142 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 143 PetscCall(PetscLayoutSetUp(layout)); 144 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 145 for (p = pStart; p < pEnd; ++p) { 146 PetscInt gdof, gcdof; 147 148 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 149 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 150 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, (gdof < 0 ? -(gdof + 1) : gdof)); 151 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 152 } 153 PetscCall(PetscMalloc1(nleaves, &local)); 154 PetscCall(PetscMalloc1(nleaves, &remote)); 155 for (p = pStart, l = 0; p < pEnd; ++p) { 156 const PetscInt *cind; 157 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 158 159 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 160 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 161 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 162 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 163 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 164 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 165 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 166 if (!gdof) continue; /* Censored point */ 167 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 168 if (gsize != dof - cdof) { 169 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 170 cdof = 0; /* Ignore constraints */ 171 } 172 for (d = 0, c = 0; d < dof; ++d) { 173 if ((c < cdof) && (cind[c] == d)) { 174 ++c; 175 continue; 176 } 177 local[l + d - c] = off + d; 178 } 179 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 180 if (gdof < 0) { 181 for (d = 0; d < gsize; ++d, ++l) { 182 PetscInt offset = -(goff + 1) + d, r; 183 184 PetscCall(PetscFindInt(offset, size + 1, ranges, &r)); 185 if (r < 0) r = -(r + 2); 186 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %" PetscInt_FMT " (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 187 remote[l].rank = r; 188 remote[l].index = offset - ranges[r]; 189 } 190 } else { 191 for (d = 0; d < gsize; ++d, ++l) { 192 remote[l].rank = rank; 193 remote[l].index = goff + d - ranges[rank]; 194 } 195 } 196 } 197 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 198 PetscCall(PetscLayoutDestroy(&layout)); 199 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 /*@C 204 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 205 206 Collective 207 208 Input Parameters: 209 + sf - The `PetscSF` 210 - rootSection - Section defined on root space 211 212 Output Parameters: 213 + remoteOffsets - root offsets in leaf storage, or NULL 214 - leafSection - Section defined on the leaf space 215 216 Level: advanced 217 218 Fortran Notes: 219 In Fortran, use PetscSFDistributeSectionF90() 220 221 .seealso: `PetscSF`, `PetscSFCreate()` 222 @*/ 223 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) 224 { 225 PetscSF embedSF; 226 const PetscInt *indices; 227 IS selected; 228 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_MAX_INT, lpEnd = -1, f, c; 229 PetscBool *sub, hasc; 230 231 PetscFunctionBegin; 232 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 233 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 234 if (numFields) { 235 IS perm; 236 237 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 238 leafSection->perm. To keep this permutation set by the user, we grab 239 the reference before calling PetscSectionSetNumFields() and set it 240 back after. */ 241 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 242 PetscCall(PetscObjectReference((PetscObject)perm)); 243 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 244 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 245 PetscCall(ISDestroy(&perm)); 246 } 247 PetscCall(PetscMalloc1(numFields + 2, &sub)); 248 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 249 for (f = 0; f < numFields; ++f) { 250 PetscSectionSym sym, dsym = NULL; 251 const char *name = NULL; 252 PetscInt numComp = 0; 253 254 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 255 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 256 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 257 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 258 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 259 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 260 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 261 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 262 PetscCall(PetscSectionSymDestroy(&dsym)); 263 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 264 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 265 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 266 } 267 } 268 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 269 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 270 rpEnd = PetscMin(rpEnd, nroots); 271 rpEnd = PetscMax(rpStart, rpEnd); 272 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 273 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 274 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 275 if (sub[0]) { 276 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 277 PetscCall(ISGetIndices(selected, &indices)); 278 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 279 PetscCall(ISRestoreIndices(selected, &indices)); 280 PetscCall(ISDestroy(&selected)); 281 } else { 282 PetscCall(PetscObjectReference((PetscObject)sf)); 283 embedSF = sf; 284 } 285 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 286 lpEnd++; 287 288 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 289 290 /* Constrained dof section */ 291 hasc = sub[1]; 292 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 293 294 /* Could fuse these at the cost of copies and extra allocation */ 295 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 296 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 297 if (sub[1]) { 298 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 299 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 300 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 301 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 302 } 303 for (f = 0; f < numFields; ++f) { 304 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 305 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 306 if (sub[2 + f]) { 307 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 308 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 309 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 310 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 311 } 312 } 313 if (remoteOffsets) { 314 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 315 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 316 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 317 } 318 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 319 PetscCall(PetscSectionSetUp(leafSection)); 320 if (hasc) { /* need to communicate bcIndices */ 321 PetscSF bcSF; 322 PetscInt *rOffBc; 323 324 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 325 if (sub[1]) { 326 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 327 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 328 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 329 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 330 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 331 PetscCall(PetscSFDestroy(&bcSF)); 332 } 333 for (f = 0; f < numFields; ++f) { 334 if (sub[2 + f]) { 335 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 336 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 337 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 338 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 339 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 340 PetscCall(PetscSFDestroy(&bcSF)); 341 } 342 } 343 PetscCall(PetscFree(rOffBc)); 344 } 345 PetscCall(PetscSFDestroy(&embedSF)); 346 PetscCall(PetscFree(sub)); 347 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 /*@C 352 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 353 354 Collective 355 356 Input Parameters: 357 + sf - The `PetscSF` 358 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 359 - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 360 361 Output Parameter: 362 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 363 364 Level: developer 365 366 Fortran Notes: 367 In Fortran, use PetscSFCreateRemoteOffsetsF90() 368 369 .seealso: `PetscSF`, `PetscSFCreate()` 370 @*/ 371 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) 372 { 373 PetscSF embedSF; 374 const PetscInt *indices; 375 IS selected; 376 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 377 378 PetscFunctionBegin; 379 *remoteOffsets = NULL; 380 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 381 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 382 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 383 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 384 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 385 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 386 PetscCall(ISGetIndices(selected, &indices)); 387 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 388 PetscCall(ISRestoreIndices(selected, &indices)); 389 PetscCall(ISDestroy(&selected)); 390 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 391 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 392 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 393 PetscCall(PetscSFDestroy(&embedSF)); 394 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 395 PetscFunctionReturn(PETSC_SUCCESS); 396 } 397 398 /*@C 399 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 400 401 Collective 402 403 Input Parameters: 404 + sf - The `PetscSF` 405 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 406 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 407 - leafSection - Data layout of local points for incoming data (this is the distributed section) 408 409 Output Parameter: 410 . sectionSF - The new `PetscSF` 411 412 Level: advanced 413 414 Notes: 415 Either rootSection or remoteOffsets can be specified 416 417 Fortran Notes: 418 In Fortran, use PetscSFCreateSectionSFF90() 419 420 .seealso: `PetscSF`, `PetscSFCreate()` 421 @*/ 422 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 423 { 424 MPI_Comm comm; 425 const PetscInt *localPoints; 426 const PetscSFNode *remotePoints; 427 PetscInt lpStart, lpEnd; 428 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 429 PetscInt *localIndices; 430 PetscSFNode *remoteIndices; 431 PetscInt i, ind; 432 433 PetscFunctionBegin; 434 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 435 PetscAssertPointer(rootSection, 2); 436 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 437 PetscAssertPointer(leafSection, 4); 438 PetscAssertPointer(sectionSF, 5); 439 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 440 PetscCall(PetscSFCreate(comm, sectionSF)); 441 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 442 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 443 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 444 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 445 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 446 for (i = 0; i < numPoints; ++i) { 447 PetscInt localPoint = localPoints ? localPoints[i] : i; 448 PetscInt dof; 449 450 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 451 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 452 numIndices += dof < 0 ? 0 : dof; 453 } 454 } 455 PetscCall(PetscMalloc1(numIndices, &localIndices)); 456 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 457 /* Create new index graph */ 458 for (i = 0, ind = 0; i < numPoints; ++i) { 459 PetscInt localPoint = localPoints ? localPoints[i] : i; 460 PetscInt rank = remotePoints[i].rank; 461 462 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 463 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 464 PetscInt loff, dof, d; 465 466 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 467 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 468 for (d = 0; d < dof; ++d, ++ind) { 469 localIndices[ind] = loff + d; 470 remoteIndices[ind].rank = rank; 471 remoteIndices[ind].index = remoteOffset + d; 472 } 473 } 474 } 475 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 476 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 477 PetscCall(PetscSFSetUp(*sectionSF)); 478 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 /*@C 483 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects 484 485 Collective 486 487 Input Parameters: 488 + rmap - `PetscLayout` defining the global root space 489 - lmap - `PetscLayout` defining the global leaf space 490 491 Output Parameter: 492 . sf - The parallel star forest 493 494 Level: intermediate 495 496 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 497 @*/ 498 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 499 { 500 PetscInt i, nroots, nleaves = 0; 501 PetscInt rN, lst, len; 502 PetscMPIInt owner = -1; 503 PetscSFNode *remote; 504 MPI_Comm rcomm = rmap->comm; 505 MPI_Comm lcomm = lmap->comm; 506 PetscMPIInt flg; 507 508 PetscFunctionBegin; 509 PetscAssertPointer(sf, 3); 510 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 511 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 512 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 513 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 514 PetscCall(PetscSFCreate(rcomm, sf)); 515 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 516 PetscCall(PetscLayoutGetSize(rmap, &rN)); 517 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 518 PetscCall(PetscMalloc1(len - lst, &remote)); 519 for (i = lst; i < len && i < rN; i++) { 520 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 521 remote[nleaves].rank = owner; 522 remote[nleaves].index = i - rmap->range[owner]; 523 nleaves++; 524 } 525 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 526 PetscCall(PetscFree(remote)); 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 531 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt **oidxs, PetscInt **ogidxs) 532 { 533 PetscInt *owners = map->range; 534 PetscInt n = map->n; 535 PetscSF sf; 536 PetscInt *lidxs, *work = NULL; 537 PetscSFNode *ridxs; 538 PetscMPIInt rank, p = 0; 539 PetscInt r, len = 0; 540 541 PetscFunctionBegin; 542 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 543 /* Create SF where leaves are input idxs and roots are owned idxs */ 544 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 545 PetscCall(PetscMalloc1(n, &lidxs)); 546 for (r = 0; r < n; ++r) lidxs[r] = -1; 547 PetscCall(PetscMalloc1(N, &ridxs)); 548 for (r = 0; r < N; ++r) { 549 const PetscInt idx = idxs[r]; 550 PetscCheck(idx >= 0 && idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 551 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 552 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 553 } 554 ridxs[r].rank = p; 555 ridxs[r].index = idxs[r] - owners[p]; 556 } 557 PetscCall(PetscSFCreate(map->comm, &sf)); 558 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 559 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 560 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 561 if (ogidxs) { /* communicate global idxs */ 562 PetscInt cum = 0, start, *work2; 563 564 PetscCall(PetscMalloc1(n, &work)); 565 PetscCall(PetscCalloc1(N, &work2)); 566 for (r = 0; r < N; ++r) 567 if (idxs[r] >= 0) cum++; 568 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 569 start -= cum; 570 cum = 0; 571 for (r = 0; r < N; ++r) 572 if (idxs[r] >= 0) work2[r] = start + cum++; 573 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 574 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 575 PetscCall(PetscFree(work2)); 576 } 577 PetscCall(PetscSFDestroy(&sf)); 578 /* Compress and put in indices */ 579 for (r = 0; r < n; ++r) 580 if (lidxs[r] >= 0) { 581 if (work) work[len] = work[r]; 582 lidxs[len++] = r; 583 } 584 if (on) *on = len; 585 if (oidxs) *oidxs = lidxs; 586 if (ogidxs) *ogidxs = work; 587 PetscFunctionReturn(PETSC_SUCCESS); 588 } 589 590 /*@ 591 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 592 593 Collective 594 595 Input Parameters: 596 + layout - `PetscLayout` defining the global index space and the rank that brokers each index 597 . numRootIndices - size of rootIndices 598 . rootIndices - `PetscInt` array of global indices of which this process requests ownership 599 . rootLocalIndices - root local index permutation (NULL if no permutation) 600 . rootLocalOffset - offset to be added to root local indices 601 . numLeafIndices - size of leafIndices 602 . leafIndices - `PetscInt` array of global indices with which this process requires data associated 603 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 604 - leafLocalOffset - offset to be added to leaf local indices 605 606 Output Parameters: 607 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 608 - sf - star forest representing the communication pattern from the root space to the leaf space 609 610 Level: advanced 611 612 Example 1: 613 .vb 614 rank : 0 1 2 615 rootIndices : [1 0 2] [3] [3] 616 rootLocalOffset : 100 200 300 617 layout : [0 1] [2] [3] 618 leafIndices : [0] [2] [0 3] 619 leafLocalOffset : 400 500 600 620 621 would build the following PetscSF 622 623 [0] 400 <- (0,101) 624 [1] 500 <- (0,102) 625 [2] 600 <- (0,101) 626 [2] 601 <- (2,300) 627 .ve 628 629 Example 2: 630 .vb 631 rank : 0 1 2 632 rootIndices : [1 0 2] [3] [3] 633 rootLocalOffset : 100 200 300 634 layout : [0 1] [2] [3] 635 leafIndices : rootIndices rootIndices rootIndices 636 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 637 638 would build the following PetscSF 639 640 [1] 200 <- (2,300) 641 .ve 642 643 Example 3: 644 .vb 645 No process requests ownership of global index 1, but no process needs it. 646 647 rank : 0 1 2 648 numRootIndices : 2 1 1 649 rootIndices : [0 2] [3] [3] 650 rootLocalOffset : 100 200 300 651 layout : [0 1] [2] [3] 652 numLeafIndices : 1 1 2 653 leafIndices : [0] [2] [0 3] 654 leafLocalOffset : 400 500 600 655 656 would build the following PetscSF 657 658 [0] 400 <- (0,100) 659 [1] 500 <- (0,101) 660 [2] 600 <- (0,100) 661 [2] 601 <- (2,300) 662 .ve 663 664 Notes: 665 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 666 local size can be set to `PETSC_DECIDE`. 667 668 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 669 ownership of x and sends its own rank and the local index of x to process i. 670 If multiple processes request ownership of x, the one with the highest rank is to own x. 671 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 672 ownership information of x. 673 The output sf is constructed by associating each leaf point to a root point in this way. 674 675 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 676 The optional output `PetscSF` object sfA can be used to push such data to leaf points. 677 678 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 679 must cover that of leafIndices, but need not cover the entire layout. 680 681 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 682 star forest is almost identity, so will only include non-trivial part of the map. 683 684 Developer Notes: 685 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 686 hash(rank, root_local_index) as the bid for the ownership determination. 687 688 .seealso: `PetscSF`, `PetscSFCreate()` 689 @*/ 690 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 691 { 692 MPI_Comm comm = layout->comm; 693 PetscMPIInt size, rank; 694 PetscSF sf1; 695 PetscSFNode *owners, *buffer, *iremote; 696 PetscInt *ilocal, nleaves, N, n, i; 697 #if defined(PETSC_USE_DEBUG) 698 PetscInt N1; 699 #endif 700 PetscBool flag; 701 702 PetscFunctionBegin; 703 if (rootIndices) PetscAssertPointer(rootIndices, 3); 704 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 705 if (leafIndices) PetscAssertPointer(leafIndices, 7); 706 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 707 if (sfA) PetscAssertPointer(sfA, 10); 708 PetscAssertPointer(sf, 11); 709 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 710 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 711 PetscCallMPI(MPI_Comm_size(comm, &size)); 712 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 713 PetscCall(PetscLayoutSetUp(layout)); 714 PetscCall(PetscLayoutGetSize(layout, &N)); 715 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 716 flag = (PetscBool)(leafIndices == rootIndices); 717 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 718 PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 719 #if defined(PETSC_USE_DEBUG) 720 N1 = PETSC_MIN_INT; 721 for (i = 0; i < numRootIndices; i++) 722 if (rootIndices[i] > N1) N1 = rootIndices[i]; 723 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 724 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 725 if (!flag) { 726 N1 = PETSC_MIN_INT; 727 for (i = 0; i < numLeafIndices; i++) 728 if (leafIndices[i] > N1) N1 = leafIndices[i]; 729 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 730 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 731 } 732 #endif 733 /* Reduce: owners -> buffer */ 734 PetscCall(PetscMalloc1(n, &buffer)); 735 PetscCall(PetscSFCreate(comm, &sf1)); 736 PetscCall(PetscSFSetFromOptions(sf1)); 737 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 738 PetscCall(PetscMalloc1(numRootIndices, &owners)); 739 for (i = 0; i < numRootIndices; ++i) { 740 owners[i].rank = rank; 741 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 742 } 743 for (i = 0; i < n; ++i) { 744 buffer[i].index = -1; 745 buffer[i].rank = -1; 746 } 747 PetscCall(PetscSFReduceBegin(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 748 PetscCall(PetscSFReduceEnd(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 749 /* Bcast: buffer -> owners */ 750 if (!flag) { 751 /* leafIndices is different from rootIndices */ 752 PetscCall(PetscFree(owners)); 753 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 754 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 755 } 756 PetscCall(PetscSFBcastBegin(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 757 PetscCall(PetscSFBcastEnd(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 758 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 759 PetscCall(PetscFree(buffer)); 760 if (sfA) { 761 *sfA = sf1; 762 } else PetscCall(PetscSFDestroy(&sf1)); 763 /* Create sf */ 764 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 765 /* leaf space == root space */ 766 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 767 if (owners[i].rank != rank) ++nleaves; 768 PetscCall(PetscMalloc1(nleaves, &ilocal)); 769 PetscCall(PetscMalloc1(nleaves, &iremote)); 770 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 771 if (owners[i].rank != rank) { 772 ilocal[nleaves] = leafLocalOffset + i; 773 iremote[nleaves].rank = owners[i].rank; 774 iremote[nleaves].index = owners[i].index; 775 ++nleaves; 776 } 777 } 778 PetscCall(PetscFree(owners)); 779 } else { 780 nleaves = numLeafIndices; 781 PetscCall(PetscMalloc1(nleaves, &ilocal)); 782 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 783 iremote = owners; 784 } 785 PetscCall(PetscSFCreate(comm, sf)); 786 PetscCall(PetscSFSetFromOptions(*sf)); 787 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 788 PetscFunctionReturn(PETSC_SUCCESS); 789 } 790 791 /*@ 792 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 793 794 Collective 795 796 Input Parameters: 797 + sfa - default `PetscSF` 798 - sfb - additional edges to add/replace edges in sfa 799 800 Output Parameter: 801 . merged - new `PetscSF` with combined edges 802 803 Level: intermediate 804 805 .seealso: `PetscSFCompose()` 806 @*/ 807 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 808 { 809 PetscInt maxleaf; 810 811 PetscFunctionBegin; 812 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 813 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 814 PetscCheckSameComm(sfa, 1, sfb, 2); 815 PetscAssertPointer(merged, 3); 816 { 817 PetscInt aleaf, bleaf; 818 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 819 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 820 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 821 } 822 PetscInt *clocal, aroots, aleaves, broots, bleaves; 823 PetscSFNode *cremote; 824 const PetscInt *alocal, *blocal; 825 const PetscSFNode *aremote, *bremote; 826 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 827 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 828 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 829 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 830 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 831 for (PetscInt i = 0; i < aleaves; i++) { 832 PetscInt a = alocal ? alocal[i] : i; 833 clocal[a] = a; 834 cremote[a] = aremote[i]; 835 } 836 for (PetscInt i = 0; i < bleaves; i++) { 837 PetscInt b = blocal ? blocal[i] : i; 838 clocal[b] = b; 839 cremote[b] = bremote[i]; 840 } 841 PetscInt nleaves = 0; 842 for (PetscInt i = 0; i < maxleaf; i++) { 843 if (clocal[i] < 0) continue; 844 clocal[nleaves] = clocal[i]; 845 cremote[nleaves] = cremote[i]; 846 nleaves++; 847 } 848 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 849 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 850 PetscCall(PetscFree2(clocal, cremote)); 851 PetscFunctionReturn(PETSC_SUCCESS); 852 } 853 854 /*@ 855 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 856 857 Collective 858 859 Input Parameters: 860 + sf - star forest 861 . bs - stride 862 . ldr - leading dimension of root space 863 - ldl - leading dimension of leaf space 864 865 Output Parameter: 866 . vsf - the new `PetscSF` 867 868 Level: intermediate 869 870 Notes: 871 This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence 872 .vb 873 c_datatype *roots, *leaves; 874 for i in [0,bs) do 875 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 876 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 877 .ve 878 is equivalent to 879 .vb 880 c_datatype *roots, *leaves; 881 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 882 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 883 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 884 .ve 885 886 Developer Notes: 887 Should this functionality be handled with a new API instead of creating a new object? 888 889 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 890 @*/ 891 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 892 { 893 PetscSF rankssf; 894 const PetscSFNode *iremote, *sfrremote; 895 PetscSFNode *viremote; 896 const PetscInt *ilocal; 897 PetscInt *vilocal = NULL, *ldrs; 898 PetscInt nranks, nr, nl, vnr, vnl, maxl; 899 PetscMPIInt rank; 900 MPI_Comm comm; 901 PetscSFType sftype; 902 903 PetscFunctionBegin; 904 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 905 PetscValidLogicalCollectiveInt(sf, bs, 2); 906 PetscAssertPointer(vsf, 5); 907 if (bs == 1) { 908 PetscCall(PetscObjectReference((PetscObject)sf)); 909 *vsf = sf; 910 PetscFunctionReturn(PETSC_SUCCESS); 911 } 912 PetscCall(PetscSFSetUp(sf)); 913 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 914 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 915 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 916 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 917 maxl += 1; 918 if (ldl == PETSC_DECIDE) ldl = maxl; 919 if (ldr == PETSC_DECIDE) ldr = nr; 920 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 921 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 922 vnr = nr * bs; 923 vnl = nl * bs; 924 PetscCall(PetscMalloc1(vnl, &viremote)); 925 PetscCall(PetscMalloc1(vnl, &vilocal)); 926 927 /* Communicate root leading dimensions to leaf ranks */ 928 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 929 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 930 PetscCall(PetscMalloc1(nranks, &ldrs)); 931 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 932 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 933 934 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 935 const PetscInt r = iremote[i].rank; 936 const PetscInt ii = iremote[i].index; 937 938 if (r == rank) lda = ldr; 939 else if (rold != r) { 940 PetscInt j; 941 942 for (j = 0; j < nranks; j++) 943 if (sfrremote[j].rank == r) break; 944 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 945 lda = ldrs[j]; 946 } 947 rold = r; 948 for (PetscInt v = 0; v < bs; v++) { 949 viremote[v * nl + i].rank = r; 950 viremote[v * nl + i].index = v * lda + ii; 951 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 952 } 953 } 954 PetscCall(PetscFree(ldrs)); 955 PetscCall(PetscSFCreate(comm, vsf)); 956 PetscCall(PetscSFGetType(sf, &sftype)); 957 PetscCall(PetscSFSetType(*vsf, sftype)); 958 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 959 PetscFunctionReturn(PETSC_SUCCESS); 960 } 961