1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@ 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a `PetscLayout` 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - `PetscLayout` defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - gremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Note: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not 26 needed 27 28 .seealso: `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *gremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 39 PetscCall(PetscLayoutSetUp(layout)); 40 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 41 PetscCall(PetscLayoutGetRanges(layout, &range)); 42 PetscCall(PetscMalloc1(nleaves, &remote)); 43 if (nleaves) ls = gremote[0] + 1; 44 for (i = 0; i < nleaves; i++) { 45 const PetscInt idx = gremote[i] - ls; 46 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 47 PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index)); 48 remote[i].rank = lr; 49 ls = range[lr]; 50 ln = range[lr + 1] - ls; 51 } else { 52 remote[i].rank = lr; 53 remote[i].index = idx; 54 } 55 } 56 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /*@C 61 PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe this star forest 62 63 Collective 64 65 Input Parameter: 66 . sf - star forest 67 68 Output Parameters: 69 + layout - `PetscLayout` defining the global space for roots 70 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 71 . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage 72 - gremote - root vertices in global numbering corresponding to leaves in ilocal 73 74 Level: intermediate 75 76 Notes: 77 The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest. 78 The outputs `layout` and `gremote` are freshly created each time this function is called, 79 so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user. 80 81 .seealso: `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 82 @*/ 83 PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[]) 84 { 85 PetscInt nr, nl; 86 const PetscSFNode *ir; 87 PetscLayout lt; 88 89 PetscFunctionBegin; 90 PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <)); 92 if (gremote) { 93 PetscInt i; 94 const PetscInt *range; 95 PetscInt *gr; 96 97 PetscCall(PetscLayoutGetRanges(lt, &range)); 98 PetscCall(PetscMalloc1(nl, &gr)); 99 for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index; 100 *gremote = gr; 101 } 102 if (nleaves) *nleaves = nl; 103 if (layout) *layout = lt; 104 else PetscCall(PetscLayoutDestroy(<)); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 /*@ 109 PetscSFSetGraphSection - Sets the `PetscSF` graph encoding the parallel dof overlap based upon the `PetscSection` describing the data layout. 110 111 Input Parameters: 112 + sf - The `PetscSF` 113 . localSection - `PetscSection` describing the local data layout 114 - globalSection - `PetscSection` describing the global data layout 115 116 Level: developer 117 118 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 119 @*/ 120 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 121 { 122 MPI_Comm comm; 123 PetscLayout layout; 124 const PetscInt *ranges; 125 PetscInt *local; 126 PetscSFNode *remote; 127 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 128 PetscMPIInt size, rank; 129 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 132 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 133 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 134 135 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 136 PetscCallMPI(MPI_Comm_size(comm, &size)); 137 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 138 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 139 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 140 PetscCall(PetscLayoutCreate(comm, &layout)); 141 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 142 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 143 PetscCall(PetscLayoutSetUp(layout)); 144 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 145 for (p = pStart; p < pEnd; ++p) { 146 PetscInt gdof, gcdof; 147 148 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 149 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 150 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof); 151 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 152 } 153 PetscCall(PetscMalloc1(nleaves, &local)); 154 PetscCall(PetscMalloc1(nleaves, &remote)); 155 for (p = pStart, l = 0; p < pEnd; ++p) { 156 const PetscInt *cind; 157 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 158 159 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 160 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 161 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 162 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 163 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 164 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 165 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 166 if (!gdof) continue; /* Censored point */ 167 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 168 if (gsize != dof - cdof) { 169 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 170 cdof = 0; /* Ignore constraints */ 171 } 172 for (d = 0, c = 0; d < dof; ++d) { 173 if ((c < cdof) && (cind[c] == d)) { 174 ++c; 175 continue; 176 } 177 local[l + d - c] = off + d; 178 } 179 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 180 if (gdof < 0) { 181 for (d = 0; d < gsize; ++d, ++l) { 182 PetscInt offset = -(goff + 1) + d, ir; 183 PetscMPIInt r; 184 185 PetscCall(PetscFindInt(offset, size + 1, ranges, &ir)); 186 PetscCall(PetscMPIIntCast(ir, &r)); 187 if (r < 0) r = -(r + 2); 188 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 189 remote[l].rank = r; 190 remote[l].index = offset - ranges[r]; 191 } 192 } else { 193 for (d = 0; d < gsize; ++d, ++l) { 194 remote[l].rank = rank; 195 remote[l].index = goff + d - ranges[rank]; 196 } 197 } 198 } 199 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 200 PetscCall(PetscLayoutDestroy(&layout)); 201 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 /*@C 206 PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF` 207 208 Collective 209 210 Input Parameters: 211 + sf - The `PetscSF` 212 - rootSection - Section defined on root space 213 214 Output Parameters: 215 + remoteOffsets - root offsets in leaf storage, or `NULL` 216 - leafSection - Section defined on the leaf space 217 218 Level: advanced 219 220 Fortran Notes: 221 In Fortran, use PetscSFDistributeSectionF90() 222 223 .seealso: `PetscSF`, `PetscSFCreate()` 224 @*/ 225 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) 226 { 227 PetscSF embedSF; 228 const PetscInt *indices; 229 IS selected; 230 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c; 231 PetscBool *sub, hasc; 232 233 PetscFunctionBegin; 234 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 235 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 236 if (numFields) { 237 IS perm; 238 239 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 240 leafSection->perm. To keep this permutation set by the user, we grab 241 the reference before calling PetscSectionSetNumFields() and set it 242 back after. */ 243 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 244 PetscCall(PetscObjectReference((PetscObject)perm)); 245 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 246 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 247 PetscCall(ISDestroy(&perm)); 248 } 249 PetscCall(PetscMalloc1(numFields + 2, &sub)); 250 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 251 for (f = 0; f < numFields; ++f) { 252 PetscSectionSym sym, dsym = NULL; 253 const char *name = NULL; 254 PetscInt numComp = 0; 255 256 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 257 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 258 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 259 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 260 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 261 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 262 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 263 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 264 PetscCall(PetscSectionSymDestroy(&dsym)); 265 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 266 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 267 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 268 } 269 } 270 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 271 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 272 rpEnd = PetscMin(rpEnd, nroots); 273 rpEnd = PetscMax(rpStart, rpEnd); 274 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 275 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 276 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, (PetscMPIInt)(2 + numFields), MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 277 if (sub[0]) { 278 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 279 PetscCall(ISGetIndices(selected, &indices)); 280 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 281 PetscCall(ISRestoreIndices(selected, &indices)); 282 PetscCall(ISDestroy(&selected)); 283 } else { 284 PetscCall(PetscObjectReference((PetscObject)sf)); 285 embedSF = sf; 286 } 287 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 288 lpEnd++; 289 290 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 291 292 /* Constrained dof section */ 293 hasc = sub[1]; 294 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 295 296 /* Could fuse these at the cost of copies and extra allocation */ 297 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 298 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE)); 299 if (sub[1]) { 300 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 301 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 302 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 303 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 304 } 305 for (f = 0; f < numFields; ++f) { 306 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 307 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE)); 308 if (sub[2 + f]) { 309 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 310 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 311 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 312 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 313 } 314 } 315 if (remoteOffsets) { 316 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 317 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 318 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 319 } 320 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 321 PetscCall(PetscSectionSetUp(leafSection)); 322 if (hasc) { /* need to communicate bcIndices */ 323 PetscSF bcSF; 324 PetscInt *rOffBc; 325 326 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 327 if (sub[1]) { 328 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 329 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 330 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 331 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 332 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 333 PetscCall(PetscSFDestroy(&bcSF)); 334 } 335 for (f = 0; f < numFields; ++f) { 336 if (sub[2 + f]) { 337 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 338 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 339 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 340 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 341 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 342 PetscCall(PetscSFDestroy(&bcSF)); 343 } 344 } 345 PetscCall(PetscFree(rOffBc)); 346 } 347 PetscCall(PetscSFDestroy(&embedSF)); 348 PetscCall(PetscFree(sub)); 349 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 /*@C 354 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 355 356 Collective 357 358 Input Parameters: 359 + sf - The `PetscSF` 360 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 361 - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 362 363 Output Parameter: 364 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 365 366 Level: developer 367 368 Fortran Notes: 369 In Fortran, use PetscSFCreateRemoteOffsetsF90() 370 371 .seealso: `PetscSF`, `PetscSFCreate()` 372 @*/ 373 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) 374 { 375 PetscSF embedSF; 376 const PetscInt *indices; 377 IS selected; 378 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 379 380 PetscFunctionBegin; 381 *remoteOffsets = NULL; 382 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 383 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 384 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 385 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 386 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 387 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 388 PetscCall(ISGetIndices(selected, &indices)); 389 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 390 PetscCall(ISRestoreIndices(selected, &indices)); 391 PetscCall(ISDestroy(&selected)); 392 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 393 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 394 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE)); 395 PetscCall(PetscSFDestroy(&embedSF)); 396 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 /*@C 401 PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points 402 403 Collective 404 405 Input Parameters: 406 + sf - The `PetscSF` 407 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 408 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 409 - leafSection - Data layout of local points for incoming data (this is the distributed section) 410 411 Output Parameter: 412 . sectionSF - The new `PetscSF` 413 414 Level: advanced 415 416 Notes: 417 `remoteOffsets` can be NULL if `sf` does not reference any points in leafSection 418 419 Fortran Notes: 420 In Fortran, use PetscSFCreateSectionSFF90() 421 422 .seealso: `PetscSF`, `PetscSFCreate()` 423 @*/ 424 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 425 { 426 MPI_Comm comm; 427 const PetscInt *localPoints; 428 const PetscSFNode *remotePoints; 429 PetscInt lpStart, lpEnd; 430 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 431 PetscInt *localIndices; 432 PetscSFNode *remoteIndices; 433 PetscInt i, ind; 434 435 PetscFunctionBegin; 436 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 437 PetscAssertPointer(rootSection, 2); 438 /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 439 PetscAssertPointer(leafSection, 4); 440 PetscAssertPointer(sectionSF, 5); 441 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 442 PetscCall(PetscSFCreate(comm, sectionSF)); 443 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 444 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 445 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 446 if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS); 447 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 448 for (i = 0; i < numPoints; ++i) { 449 PetscInt localPoint = localPoints ? localPoints[i] : i; 450 PetscInt dof; 451 452 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 453 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 454 numIndices += dof < 0 ? 0 : dof; 455 } 456 } 457 PetscCall(PetscMalloc1(numIndices, &localIndices)); 458 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 459 /* Create new index graph */ 460 for (i = 0, ind = 0; i < numPoints; ++i) { 461 PetscInt localPoint = localPoints ? localPoints[i] : i; 462 PetscMPIInt rank = (PetscMPIInt)remotePoints[i].rank; 463 464 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 465 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 466 PetscInt loff, dof, d; 467 468 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 469 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 470 for (d = 0; d < dof; ++d, ++ind) { 471 localIndices[ind] = loff + d; 472 remoteIndices[ind].rank = rank; 473 remoteIndices[ind].index = remoteOffset + d; 474 } 475 } 476 } 477 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 478 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 479 PetscCall(PetscSFSetUp(*sectionSF)); 480 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 481 PetscFunctionReturn(PETSC_SUCCESS); 482 } 483 484 /*@C 485 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two `PetscLayout` objects 486 487 Collective 488 489 Input Parameters: 490 + rmap - `PetscLayout` defining the global root space 491 - lmap - `PetscLayout` defining the global leaf space 492 493 Output Parameter: 494 . sf - The parallel star forest 495 496 Level: intermediate 497 498 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 499 @*/ 500 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) 501 { 502 PetscInt i, nroots, nleaves = 0; 503 PetscInt rN, lst, len; 504 PetscMPIInt owner = -1; 505 PetscSFNode *remote; 506 MPI_Comm rcomm = rmap->comm; 507 MPI_Comm lcomm = lmap->comm; 508 PetscMPIInt flg; 509 510 PetscFunctionBegin; 511 PetscAssertPointer(sf, 3); 512 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 513 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 514 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 515 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 516 PetscCall(PetscSFCreate(rcomm, sf)); 517 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 518 PetscCall(PetscLayoutGetSize(rmap, &rN)); 519 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 520 PetscCall(PetscMalloc1(len - lst, &remote)); 521 for (i = lst; i < len && i < rN; i++) { 522 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 523 remote[nleaves].rank = owner; 524 remote[nleaves].index = i - rmap->range[owner]; 525 nleaves++; 526 } 527 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 528 PetscCall(PetscFree(remote)); 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531 532 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 533 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt **oidxs, PetscInt **ogidxs) 534 { 535 PetscInt *owners = map->range; 536 PetscInt n = map->n; 537 PetscSF sf; 538 PetscInt *lidxs, *work = NULL; 539 PetscSFNode *ridxs; 540 PetscMPIInt rank, p = 0; 541 PetscInt r, len = 0; 542 543 PetscFunctionBegin; 544 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 545 /* Create SF where leaves are input idxs and roots are owned idxs */ 546 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 547 PetscCall(PetscMalloc1(n, &lidxs)); 548 for (r = 0; r < n; ++r) lidxs[r] = -1; 549 PetscCall(PetscMalloc1(N, &ridxs)); 550 for (r = 0; r < N; ++r) { 551 const PetscInt idx = idxs[r]; 552 PetscCheck(idx >= 0 && idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 553 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 554 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 555 } 556 ridxs[r].rank = p; 557 ridxs[r].index = idxs[r] - owners[p]; 558 } 559 PetscCall(PetscSFCreate(map->comm, &sf)); 560 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 561 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 562 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 563 if (ogidxs) { /* communicate global idxs */ 564 PetscInt cum = 0, start, *work2; 565 566 PetscCall(PetscMalloc1(n, &work)); 567 PetscCall(PetscCalloc1(N, &work2)); 568 for (r = 0; r < N; ++r) 569 if (idxs[r] >= 0) cum++; 570 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 571 start -= cum; 572 cum = 0; 573 for (r = 0; r < N; ++r) 574 if (idxs[r] >= 0) work2[r] = start + cum++; 575 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 576 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 577 PetscCall(PetscFree(work2)); 578 } 579 PetscCall(PetscSFDestroy(&sf)); 580 /* Compress and put in indices */ 581 for (r = 0; r < n; ++r) 582 if (lidxs[r] >= 0) { 583 if (work) work[len] = work[r]; 584 lidxs[len++] = r; 585 } 586 if (on) *on = len; 587 if (oidxs) *oidxs = lidxs; 588 if (ogidxs) *ogidxs = work; 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 /*@ 593 PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices 594 595 Collective 596 597 Input Parameters: 598 + layout - `PetscLayout` defining the global index space and the rank that brokers each index 599 . numRootIndices - size of rootIndices 600 . rootIndices - `PetscInt` array of global indices of which this process requests ownership 601 . rootLocalIndices - root local index permutation (NULL if no permutation) 602 . rootLocalOffset - offset to be added to root local indices 603 . numLeafIndices - size of leafIndices 604 . leafIndices - `PetscInt` array of global indices with which this process requires data associated 605 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 606 - leafLocalOffset - offset to be added to leaf local indices 607 608 Output Parameters: 609 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 610 - sf - star forest representing the communication pattern from the root space to the leaf space 611 612 Level: advanced 613 614 Example 1: 615 .vb 616 rank : 0 1 2 617 rootIndices : [1 0 2] [3] [3] 618 rootLocalOffset : 100 200 300 619 layout : [0 1] [2] [3] 620 leafIndices : [0] [2] [0 3] 621 leafLocalOffset : 400 500 600 622 623 would build the following PetscSF 624 625 [0] 400 <- (0,101) 626 [1] 500 <- (0,102) 627 [2] 600 <- (0,101) 628 [2] 601 <- (2,300) 629 .ve 630 631 Example 2: 632 .vb 633 rank : 0 1 2 634 rootIndices : [1 0 2] [3] [3] 635 rootLocalOffset : 100 200 300 636 layout : [0 1] [2] [3] 637 leafIndices : rootIndices rootIndices rootIndices 638 leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 639 640 would build the following PetscSF 641 642 [1] 200 <- (2,300) 643 .ve 644 645 Example 3: 646 .vb 647 No process requests ownership of global index 1, but no process needs it. 648 649 rank : 0 1 2 650 numRootIndices : 2 1 1 651 rootIndices : [0 2] [3] [3] 652 rootLocalOffset : 100 200 300 653 layout : [0 1] [2] [3] 654 numLeafIndices : 1 1 2 655 leafIndices : [0] [2] [0 3] 656 leafLocalOffset : 400 500 600 657 658 would build the following PetscSF 659 660 [0] 400 <- (0,100) 661 [1] 500 <- (0,101) 662 [2] 600 <- (0,100) 663 [2] 601 <- (2,300) 664 .ve 665 666 Notes: 667 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 668 local size can be set to `PETSC_DECIDE`. 669 670 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 671 ownership of x and sends its own rank and the local index of x to process i. 672 If multiple processes request ownership of x, the one with the highest rank is to own x. 673 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 674 ownership information of x. 675 The output sf is constructed by associating each leaf point to a root point in this way. 676 677 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 678 The optional output `PetscSF` object sfA can be used to push such data to leaf points. 679 680 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 681 must cover that of leafIndices, but need not cover the entire layout. 682 683 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 684 star forest is almost identity, so will only include non-trivial part of the map. 685 686 Developer Notes: 687 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 688 hash(rank, root_local_index) as the bid for the ownership determination. 689 690 .seealso: `PetscSF`, `PetscSFCreate()` 691 @*/ 692 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 693 { 694 MPI_Comm comm = layout->comm; 695 PetscMPIInt size, rank; 696 PetscSF sf1; 697 PetscSFNode *owners, *buffer, *iremote; 698 PetscInt *ilocal, nleaves, N, n, i; 699 #if defined(PETSC_USE_DEBUG) 700 PetscInt N1; 701 #endif 702 PetscBool flag; 703 704 PetscFunctionBegin; 705 if (rootIndices) PetscAssertPointer(rootIndices, 3); 706 if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4); 707 if (leafIndices) PetscAssertPointer(leafIndices, 7); 708 if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8); 709 if (sfA) PetscAssertPointer(sfA, 10); 710 PetscAssertPointer(sf, 11); 711 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 712 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 713 PetscCallMPI(MPI_Comm_size(comm, &size)); 714 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 715 PetscCall(PetscLayoutSetUp(layout)); 716 PetscCall(PetscLayoutGetSize(layout, &N)); 717 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 718 flag = (PetscBool)(leafIndices == rootIndices); 719 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 720 PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 721 #if defined(PETSC_USE_DEBUG) 722 N1 = PETSC_INT_MIN; 723 for (i = 0; i < numRootIndices; i++) 724 if (rootIndices[i] > N1) N1 = rootIndices[i]; 725 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 726 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 727 if (!flag) { 728 N1 = PETSC_INT_MIN; 729 for (i = 0; i < numLeafIndices; i++) 730 if (leafIndices[i] > N1) N1 = leafIndices[i]; 731 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 732 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 733 } 734 #endif 735 /* Reduce: owners -> buffer */ 736 PetscCall(PetscMalloc1(n, &buffer)); 737 PetscCall(PetscSFCreate(comm, &sf1)); 738 PetscCall(PetscSFSetFromOptions(sf1)); 739 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 740 PetscCall(PetscMalloc1(numRootIndices, &owners)); 741 for (i = 0; i < numRootIndices; ++i) { 742 owners[i].rank = rank; 743 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 744 } 745 for (i = 0; i < n; ++i) { 746 buffer[i].index = -1; 747 buffer[i].rank = -1; 748 } 749 PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 750 PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC)); 751 /* Bcast: buffer -> owners */ 752 if (!flag) { 753 /* leafIndices is different from rootIndices */ 754 PetscCall(PetscFree(owners)); 755 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 756 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 757 } 758 PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 759 PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE)); 760 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 761 PetscCall(PetscFree(buffer)); 762 if (sfA) { 763 *sfA = sf1; 764 } else PetscCall(PetscSFDestroy(&sf1)); 765 /* Create sf */ 766 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 767 /* leaf space == root space */ 768 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 769 if (owners[i].rank != rank) ++nleaves; 770 PetscCall(PetscMalloc1(nleaves, &ilocal)); 771 PetscCall(PetscMalloc1(nleaves, &iremote)); 772 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 773 if (owners[i].rank != rank) { 774 ilocal[nleaves] = leafLocalOffset + i; 775 iremote[nleaves].rank = owners[i].rank; 776 iremote[nleaves].index = owners[i].index; 777 ++nleaves; 778 } 779 } 780 PetscCall(PetscFree(owners)); 781 } else { 782 nleaves = numLeafIndices; 783 PetscCall(PetscMalloc1(nleaves, &ilocal)); 784 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 785 iremote = owners; 786 } 787 PetscCall(PetscSFCreate(comm, sf)); 788 PetscCall(PetscSFSetFromOptions(*sf)); 789 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 /*@ 794 PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb` 795 796 Collective 797 798 Input Parameters: 799 + sfa - default `PetscSF` 800 - sfb - additional edges to add/replace edges in sfa 801 802 Output Parameter: 803 . merged - new `PetscSF` with combined edges 804 805 Level: intermediate 806 807 .seealso: `PetscSFCompose()` 808 @*/ 809 PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged) 810 { 811 PetscInt maxleaf; 812 813 PetscFunctionBegin; 814 PetscValidHeaderSpecific(sfa, PETSCSF_CLASSID, 1); 815 PetscValidHeaderSpecific(sfb, PETSCSF_CLASSID, 2); 816 PetscCheckSameComm(sfa, 1, sfb, 2); 817 PetscAssertPointer(merged, 3); 818 { 819 PetscInt aleaf, bleaf; 820 PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf)); 821 PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf)); 822 maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index 823 } 824 PetscInt *clocal, aroots, aleaves, broots, bleaves; 825 PetscSFNode *cremote; 826 const PetscInt *alocal, *blocal; 827 const PetscSFNode *aremote, *bremote; 828 PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote)); 829 for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1; 830 PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote)); 831 PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote)); 832 PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space"); 833 for (PetscInt i = 0; i < aleaves; i++) { 834 PetscInt a = alocal ? alocal[i] : i; 835 clocal[a] = a; 836 cremote[a] = aremote[i]; 837 } 838 for (PetscInt i = 0; i < bleaves; i++) { 839 PetscInt b = blocal ? blocal[i] : i; 840 clocal[b] = b; 841 cremote[b] = bremote[i]; 842 } 843 PetscInt nleaves = 0; 844 for (PetscInt i = 0; i < maxleaf; i++) { 845 if (clocal[i] < 0) continue; 846 clocal[nleaves] = clocal[i]; 847 cremote[nleaves] = cremote[i]; 848 nleaves++; 849 } 850 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged)); 851 PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES)); 852 PetscCall(PetscFree2(clocal, cremote)); 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 /*@ 857 PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data 858 859 Collective 860 861 Input Parameters: 862 + sf - star forest 863 . bs - stride 864 . ldr - leading dimension of root space 865 - ldl - leading dimension of leaf space 866 867 Output Parameter: 868 . vsf - the new `PetscSF` 869 870 Level: intermediate 871 872 Notes: 873 This can be useful to perform communications on blocks of right-hand sides. For example, the calling sequence 874 .vb 875 c_datatype *roots, *leaves; 876 for i in [0,bs) do 877 PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 878 PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op) 879 .ve 880 is equivalent to 881 .vb 882 c_datatype *roots, *leaves; 883 PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf) 884 PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op) 885 PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op) 886 .ve 887 888 Developer Notes: 889 Should this functionality be handled with a new API instead of creating a new object? 890 891 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()` 892 @*/ 893 PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf) 894 { 895 PetscSF rankssf; 896 const PetscSFNode *iremote, *sfrremote; 897 PetscSFNode *viremote; 898 const PetscInt *ilocal; 899 PetscInt *vilocal = NULL, *ldrs; 900 PetscInt nranks, nr, nl, vnr, vnl, maxl; 901 PetscMPIInt rank; 902 MPI_Comm comm; 903 PetscSFType sftype; 904 905 PetscFunctionBegin; 906 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 907 PetscValidLogicalCollectiveInt(sf, bs, 2); 908 PetscAssertPointer(vsf, 5); 909 if (bs == 1) { 910 PetscCall(PetscObjectReference((PetscObject)sf)); 911 *vsf = sf; 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 PetscCall(PetscSFSetUp(sf)); 915 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 916 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 917 PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 918 PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 919 maxl += 1; 920 if (ldl == PETSC_DECIDE) ldl = maxl; 921 if (ldr == PETSC_DECIDE) ldr = nr; 922 PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr); 923 PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1); 924 vnr = nr * bs; 925 vnl = nl * bs; 926 PetscCall(PetscMalloc1(vnl, &viremote)); 927 PetscCall(PetscMalloc1(vnl, &vilocal)); 928 929 /* Communicate root leading dimensions to leaf ranks */ 930 PetscCall(PetscSFGetRanksSF(sf, &rankssf)); 931 PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote)); 932 PetscCall(PetscMalloc1(nranks, &ldrs)); 933 PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 934 PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 935 936 for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) { 937 const PetscMPIInt r = (PetscMPIInt)iremote[i].rank; 938 const PetscInt ii = iremote[i].index; 939 940 if (r == rank) lda = ldr; 941 else if (rold != r) { 942 PetscInt j; 943 944 for (j = 0; j < nranks; j++) 945 if (sfrremote[j].rank == r) break; 946 PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %d", r); 947 lda = ldrs[j]; 948 } 949 rold = r; 950 for (PetscInt v = 0; v < bs; v++) { 951 viremote[v * nl + i].rank = r; 952 viremote[v * nl + i].index = v * lda + ii; 953 vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i); 954 } 955 } 956 PetscCall(PetscFree(ldrs)); 957 PetscCall(PetscSFCreate(comm, vsf)); 958 PetscCall(PetscSFGetType(sf, &sftype)); 959 PetscCall(PetscSFSetType(*vsf, sftype)); 960 PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 961 PetscFunctionReturn(PETSC_SUCCESS); 962 } 963