1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@C 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a PetscLayout 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - PetscLayout defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - iremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Notes: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is PETSC_OWN_POINTER. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 26 needed 27 28 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()` 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, const PetscInt *iremote) { 31 const PetscInt *range; 32 PetscInt i, nroots, ls = -1, ln = -1; 33 PetscMPIInt lr = -1; 34 PetscSFNode *remote; 35 36 PetscFunctionBegin; 37 PetscCall(PetscLayoutGetLocalSize(layout, &nroots)); 38 PetscCall(PetscLayoutGetRanges(layout, &range)); 39 PetscCall(PetscMalloc1(nleaves, &remote)); 40 if (nleaves) ls = iremote[0] + 1; 41 for (i = 0; i < nleaves; i++) { 42 const PetscInt idx = iremote[i] - ls; 43 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 44 PetscCall(PetscLayoutFindOwnerIndex(layout, iremote[i], &lr, &remote[i].index)); 45 remote[i].rank = lr; 46 ls = range[lr]; 47 ln = range[lr + 1] - ls; 48 } else { 49 remote[i].rank = lr; 50 remote[i].index = idx; 51 } 52 } 53 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER)); 54 PetscFunctionReturn(0); 55 } 56 57 /*@ 58 PetscSFSetGraphSection - Sets the PetscSF graph encoding the parallel dof overlap based upon the PetscSections 59 describing the data layout. 60 61 Input Parameters: 62 + sf - The SF 63 . localSection - PetscSection describing the local data layout 64 - globalSection - PetscSection describing the global data layout 65 66 Level: developer 67 68 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphLayout()` 69 @*/ 70 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) { 71 MPI_Comm comm; 72 PetscLayout layout; 73 const PetscInt *ranges; 74 PetscInt *local; 75 PetscSFNode *remote; 76 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 77 PetscMPIInt size, rank; 78 79 PetscFunctionBegin; 80 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 81 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 82 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 83 84 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 85 PetscCallMPI(MPI_Comm_size(comm, &size)); 86 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 87 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 88 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 89 PetscCall(PetscLayoutCreate(comm, &layout)); 90 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 91 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 92 PetscCall(PetscLayoutSetUp(layout)); 93 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 94 for (p = pStart; p < pEnd; ++p) { 95 PetscInt gdof, gcdof; 96 97 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 98 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 99 PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, (gdof < 0 ? -(gdof + 1) : gdof)); 100 nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 101 } 102 PetscCall(PetscMalloc1(nleaves, &local)); 103 PetscCall(PetscMalloc1(nleaves, &remote)); 104 for (p = pStart, l = 0; p < pEnd; ++p) { 105 const PetscInt *cind; 106 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 107 108 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 109 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 110 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 111 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 112 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 113 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 114 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 115 if (!gdof) continue; /* Censored point */ 116 gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof; 117 if (gsize != dof - cdof) { 118 PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof); 119 cdof = 0; /* Ignore constraints */ 120 } 121 for (d = 0, c = 0; d < dof; ++d) { 122 if ((c < cdof) && (cind[c] == d)) { 123 ++c; 124 continue; 125 } 126 local[l + d - c] = off + d; 127 } 128 PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c); 129 if (gdof < 0) { 130 for (d = 0; d < gsize; ++d, ++l) { 131 PetscInt offset = -(goff + 1) + d, r; 132 133 PetscCall(PetscFindInt(offset, size + 1, ranges, &r)); 134 if (r < 0) r = -(r + 2); 135 PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %" PetscInt_FMT " (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 136 remote[l].rank = r; 137 remote[l].index = offset - ranges[r]; 138 } 139 } else { 140 for (d = 0; d < gsize; ++d, ++l) { 141 remote[l].rank = rank; 142 remote[l].index = goff + d - ranges[rank]; 143 } 144 } 145 } 146 PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 147 PetscCall(PetscLayoutDestroy(&layout)); 148 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 PetscSFDistributeSection - Create a new PetscSection reorganized, moving from the root to the leaves of the SF 154 155 Collective on sf 156 157 Input Parameters: 158 + sf - The SF 159 - rootSection - Section defined on root space 160 161 Output Parameters: 162 + remoteOffsets - root offsets in leaf storage, or NULL 163 - leafSection - Section defined on the leaf space 164 165 Level: advanced 166 167 .seealso: `PetscSFCreate()` 168 @*/ 169 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) { 170 PetscSF embedSF; 171 const PetscInt *indices; 172 IS selected; 173 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_MAX_INT, lpEnd = -1, f, c; 174 PetscBool *sub, hasc; 175 176 PetscFunctionBegin; 177 PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0)); 178 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 179 if (numFields) { 180 IS perm; 181 182 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 183 leafSection->perm. To keep this permutation set by the user, we grab 184 the reference before calling PetscSectionSetNumFields() and set it 185 back after. */ 186 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 187 PetscCall(PetscObjectReference((PetscObject)perm)); 188 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 189 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 190 PetscCall(ISDestroy(&perm)); 191 } 192 PetscCall(PetscMalloc1(numFields + 2, &sub)); 193 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 194 for (f = 0; f < numFields; ++f) { 195 PetscSectionSym sym, dsym = NULL; 196 const char *name = NULL; 197 PetscInt numComp = 0; 198 199 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 200 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 201 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 202 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 203 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 204 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 205 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 206 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 207 PetscCall(PetscSectionSymDestroy(&dsym)); 208 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 209 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 210 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 211 } 212 } 213 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 214 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 215 rpEnd = PetscMin(rpEnd, nroots); 216 rpEnd = PetscMax(rpStart, rpEnd); 217 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 218 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 219 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 220 if (sub[0]) { 221 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 222 PetscCall(ISGetIndices(selected, &indices)); 223 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 224 PetscCall(ISRestoreIndices(selected, &indices)); 225 PetscCall(ISDestroy(&selected)); 226 } else { 227 PetscCall(PetscObjectReference((PetscObject)sf)); 228 embedSF = sf; 229 } 230 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 231 lpEnd++; 232 233 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 234 235 /* Constrained dof section */ 236 hasc = sub[1]; 237 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]); 238 239 /* Could fuse these at the cost of copies and extra allocation */ 240 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart], MPI_REPLACE)); 241 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart], MPI_REPLACE)); 242 if (sub[1]) { 243 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 244 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 245 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 246 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE)); 247 } 248 for (f = 0; f < numFields; ++f) { 249 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart], MPI_REPLACE)); 250 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart], MPI_REPLACE)); 251 if (sub[2 + f]) { 252 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 253 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 254 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 255 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE)); 256 } 257 } 258 if (remoteOffsets) { 259 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 260 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart], MPI_REPLACE)); 261 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart], MPI_REPLACE)); 262 } 263 PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection)); 264 PetscCall(PetscSectionSetUp(leafSection)); 265 if (hasc) { /* need to communicate bcIndices */ 266 PetscSF bcSF; 267 PetscInt *rOffBc; 268 269 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 270 if (sub[1]) { 271 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 272 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 273 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 274 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 275 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE)); 276 PetscCall(PetscSFDestroy(&bcSF)); 277 } 278 for (f = 0; f < numFields; ++f) { 279 if (sub[2 + f]) { 280 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 281 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE)); 282 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 283 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 284 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE)); 285 PetscCall(PetscSFDestroy(&bcSF)); 286 } 287 } 288 PetscCall(PetscFree(rOffBc)); 289 } 290 PetscCall(PetscSFDestroy(&embedSF)); 291 PetscCall(PetscFree(sub)); 292 PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0)); 293 PetscFunctionReturn(0); 294 } 295 296 /*@C 297 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 298 299 Collective on sf 300 301 Input Parameters: 302 + sf - The SF 303 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 304 - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 305 306 Output Parameter: 307 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 308 309 Level: developer 310 311 .seealso: `PetscSFCreate()` 312 @*/ 313 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) { 314 PetscSF embedSF; 315 const PetscInt *indices; 316 IS selected; 317 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 318 319 PetscFunctionBegin; 320 *remoteOffsets = NULL; 321 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 322 if (numRoots < 0) PetscFunctionReturn(0); 323 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0)); 324 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 325 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 326 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 327 PetscCall(ISGetIndices(selected, &indices)); 328 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 329 PetscCall(ISRestoreIndices(selected, &indices)); 330 PetscCall(ISDestroy(&selected)); 331 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 332 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart], MPI_REPLACE)); 333 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart], MPI_REPLACE)); 334 PetscCall(PetscSFDestroy(&embedSF)); 335 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0)); 336 PetscFunctionReturn(0); 337 } 338 339 /*@C 340 PetscSFCreateSectionSF - Create an expanded SF of dofs, assuming the input SF relates points 341 342 Collective on sf 343 344 Input Parameters: 345 + sf - The SF 346 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 347 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 348 - leafSection - Data layout of local points for incoming data (this is the distributed section) 349 350 Output Parameters: 351 - sectionSF - The new SF 352 353 Note: Either rootSection or remoteOffsets can be specified 354 355 Level: advanced 356 357 .seealso: `PetscSFCreate()` 358 @*/ 359 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) { 360 MPI_Comm comm; 361 const PetscInt *localPoints; 362 const PetscSFNode *remotePoints; 363 PetscInt lpStart, lpEnd; 364 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 365 PetscInt *localIndices; 366 PetscSFNode *remoteIndices; 367 PetscInt i, ind; 368 369 PetscFunctionBegin; 370 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 371 PetscValidPointer(rootSection, 2); 372 /* Cannot check PetscValidIntPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 373 PetscValidPointer(leafSection, 4); 374 PetscValidPointer(sectionSF, 5); 375 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 376 PetscCall(PetscSFCreate(comm, sectionSF)); 377 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 378 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 379 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 380 if (numRoots < 0) PetscFunctionReturn(0); 381 PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0)); 382 for (i = 0; i < numPoints; ++i) { 383 PetscInt localPoint = localPoints ? localPoints[i] : i; 384 PetscInt dof; 385 386 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 387 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 388 numIndices += dof < 0 ? 0 : dof; 389 } 390 } 391 PetscCall(PetscMalloc1(numIndices, &localIndices)); 392 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 393 /* Create new index graph */ 394 for (i = 0, ind = 0; i < numPoints; ++i) { 395 PetscInt localPoint = localPoints ? localPoints[i] : i; 396 PetscInt rank = remotePoints[i].rank; 397 398 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 399 PetscInt remoteOffset = remoteOffsets[localPoint - lpStart]; 400 PetscInt loff, dof, d; 401 402 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 403 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 404 for (d = 0; d < dof; ++d, ++ind) { 405 localIndices[ind] = loff + d; 406 remoteIndices[ind].rank = rank; 407 remoteIndices[ind].index = remoteOffset + d; 408 } 409 } 410 } 411 PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 412 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 413 PetscCall(PetscSFSetUp(*sectionSF)); 414 PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0)); 415 PetscFunctionReturn(0); 416 } 417 418 /*@C 419 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two PetscLayout objects 420 421 Collective 422 423 Input Parameters: 424 + rmap - PetscLayout defining the global root space 425 - lmap - PetscLayout defining the global leaf space 426 427 Output Parameter: 428 . sf - The parallel star forest 429 430 Level: intermediate 431 432 .seealso: `PetscSFCreate()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()` 433 @*/ 434 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf) { 435 PetscInt i, nroots, nleaves = 0; 436 PetscInt rN, lst, len; 437 PetscMPIInt owner = -1; 438 PetscSFNode *remote; 439 MPI_Comm rcomm = rmap->comm; 440 MPI_Comm lcomm = lmap->comm; 441 PetscMPIInt flg; 442 443 PetscFunctionBegin; 444 PetscValidPointer(sf, 3); 445 PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup"); 446 PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup"); 447 PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg)); 448 PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators"); 449 PetscCall(PetscSFCreate(rcomm, sf)); 450 PetscCall(PetscLayoutGetLocalSize(rmap, &nroots)); 451 PetscCall(PetscLayoutGetSize(rmap, &rN)); 452 PetscCall(PetscLayoutGetRange(lmap, &lst, &len)); 453 PetscCall(PetscMalloc1(len - lst, &remote)); 454 for (i = lst; i < len && i < rN; i++) { 455 if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner)); 456 remote[nleaves].rank = owner; 457 remote[nleaves].index = i - rmap->range[owner]; 458 nleaves++; 459 } 460 PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES)); 461 PetscCall(PetscFree(remote)); 462 PetscFunctionReturn(0); 463 } 464 465 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 466 PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt **oidxs, PetscInt **ogidxs) { 467 PetscInt *owners = map->range; 468 PetscInt n = map->n; 469 PetscSF sf; 470 PetscInt *lidxs, *work = NULL; 471 PetscSFNode *ridxs; 472 PetscMPIInt rank, p = 0; 473 PetscInt r, len = 0; 474 475 PetscFunctionBegin; 476 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 477 /* Create SF where leaves are input idxs and roots are owned idxs */ 478 PetscCallMPI(MPI_Comm_rank(map->comm, &rank)); 479 PetscCall(PetscMalloc1(n, &lidxs)); 480 for (r = 0; r < n; ++r) lidxs[r] = -1; 481 PetscCall(PetscMalloc1(N, &ridxs)); 482 for (r = 0; r < N; ++r) { 483 const PetscInt idx = idxs[r]; 484 PetscCheck(idx >= 0 && idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N); 485 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 486 PetscCall(PetscLayoutFindOwner(map, idx, &p)); 487 } 488 ridxs[r].rank = p; 489 ridxs[r].index = idxs[r] - owners[p]; 490 } 491 PetscCall(PetscSFCreate(map->comm, &sf)); 492 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER)); 493 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 494 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR)); 495 if (ogidxs) { /* communicate global idxs */ 496 PetscInt cum = 0, start, *work2; 497 498 PetscCall(PetscMalloc1(n, &work)); 499 PetscCall(PetscCalloc1(N, &work2)); 500 for (r = 0; r < N; ++r) 501 if (idxs[r] >= 0) cum++; 502 PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm)); 503 start -= cum; 504 cum = 0; 505 for (r = 0; r < N; ++r) 506 if (idxs[r] >= 0) work2[r] = start + cum++; 507 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE)); 508 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE)); 509 PetscCall(PetscFree(work2)); 510 } 511 PetscCall(PetscSFDestroy(&sf)); 512 /* Compress and put in indices */ 513 for (r = 0; r < n; ++r) 514 if (lidxs[r] >= 0) { 515 if (work) work[len] = work[r]; 516 lidxs[len++] = r; 517 } 518 if (on) *on = len; 519 if (oidxs) *oidxs = lidxs; 520 if (ogidxs) *ogidxs = work; 521 PetscFunctionReturn(0); 522 } 523 524 /*@ 525 PetscSFCreateByMatchingIndices - Create SF by matching root and leaf indices 526 527 Collective 528 529 Input Parameters: 530 + layout - PetscLayout defining the global index space and the rank that brokers each index 531 . numRootIndices - size of rootIndices 532 . rootIndices - PetscInt array of global indices of which this process requests ownership 533 . rootLocalIndices - root local index permutation (NULL if no permutation) 534 . rootLocalOffset - offset to be added to root local indices 535 . numLeafIndices - size of leafIndices 536 . leafIndices - PetscInt array of global indices with which this process requires data associated 537 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 538 - leafLocalOffset - offset to be added to leaf local indices 539 540 Output Parameters: 541 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 542 - sf - star forest representing the communication pattern from the root space to the leaf space 543 544 Example 1: 545 $ 546 $ rank : 0 1 2 547 $ rootIndices : [1 0 2] [3] [3] 548 $ rootLocalOffset : 100 200 300 549 $ layout : [0 1] [2] [3] 550 $ leafIndices : [0] [2] [0 3] 551 $ leafLocalOffset : 400 500 600 552 $ 553 would build the following SF 554 $ 555 $ [0] 400 <- (0,101) 556 $ [1] 500 <- (0,102) 557 $ [2] 600 <- (0,101) 558 $ [2] 601 <- (2,300) 559 $ 560 Example 2: 561 $ 562 $ rank : 0 1 2 563 $ rootIndices : [1 0 2] [3] [3] 564 $ rootLocalOffset : 100 200 300 565 $ layout : [0 1] [2] [3] 566 $ leafIndices : rootIndices rootIndices rootIndices 567 $ leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 568 $ 569 would build the following SF 570 $ 571 $ [1] 200 <- (2,300) 572 $ 573 Example 3: 574 $ 575 $ No process requests ownership of global index 1, but no process needs it. 576 $ 577 $ rank : 0 1 2 578 $ numRootIndices : 2 1 1 579 $ rootIndices : [0 2] [3] [3] 580 $ rootLocalOffset : 100 200 300 581 $ layout : [0 1] [2] [3] 582 $ numLeafIndices : 1 1 2 583 $ leafIndices : [0] [2] [0 3] 584 $ leafLocalOffset : 400 500 600 585 $ 586 would build the following SF 587 $ 588 $ [0] 400 <- (0,100) 589 $ [1] 500 <- (0,101) 590 $ [2] 600 <- (0,100) 591 $ [2] 601 <- (2,300) 592 $ 593 594 Notes: 595 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 596 local size can be set to PETSC_DECIDE. 597 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 598 ownership of x and sends its own rank and the local index of x to process i. 599 If multiple processes request ownership of x, the one with the highest rank is to own x. 600 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 601 ownership information of x. 602 The output sf is constructed by associating each leaf point to a root point in this way. 603 604 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 605 The optional output PetscSF object sfA can be used to push such data to leaf points. 606 607 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 608 must cover that of leafIndices, but need not cover the entire layout. 609 610 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 611 star forest is almost identity, so will only include non-trivial part of the map. 612 613 Developer Notes: 614 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 615 hash(rank, root_local_index) as the bid for the ownership determination. 616 617 Level: advanced 618 619 .seealso: `PetscSFCreate()` 620 @*/ 621 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) { 622 MPI_Comm comm = layout->comm; 623 PetscMPIInt size, rank; 624 PetscSF sf1; 625 PetscSFNode *owners, *buffer, *iremote; 626 PetscInt *ilocal, nleaves, N, n, i; 627 #if defined(PETSC_USE_DEBUG) 628 PetscInt N1; 629 #endif 630 PetscBool flag; 631 632 PetscFunctionBegin; 633 if (rootIndices) PetscValidIntPointer(rootIndices, 3); 634 if (rootLocalIndices) PetscValidIntPointer(rootLocalIndices, 4); 635 if (leafIndices) PetscValidIntPointer(leafIndices, 7); 636 if (leafLocalIndices) PetscValidIntPointer(leafLocalIndices, 8); 637 if (sfA) PetscValidPointer(sfA, 10); 638 PetscValidPointer(sf, 11); 639 PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 640 PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 641 PetscCallMPI(MPI_Comm_size(comm, &size)); 642 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 643 PetscCall(PetscLayoutSetUp(layout)); 644 PetscCall(PetscLayoutGetSize(layout, &N)); 645 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 646 flag = (PetscBool)(leafIndices == rootIndices); 647 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 648 PetscCheck(!flag || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 649 #if defined(PETSC_USE_DEBUG) 650 N1 = PETSC_MIN_INT; 651 for (i = 0; i < numRootIndices; i++) 652 if (rootIndices[i] > N1) N1 = rootIndices[i]; 653 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 654 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 655 if (!flag) { 656 N1 = PETSC_MIN_INT; 657 for (i = 0; i < numLeafIndices; i++) 658 if (leafIndices[i] > N1) N1 = leafIndices[i]; 659 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 660 PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 661 } 662 #endif 663 /* Reduce: owners -> buffer */ 664 PetscCall(PetscMalloc1(n, &buffer)); 665 PetscCall(PetscSFCreate(comm, &sf1)); 666 PetscCall(PetscSFSetFromOptions(sf1)); 667 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 668 PetscCall(PetscMalloc1(numRootIndices, &owners)); 669 for (i = 0; i < numRootIndices; ++i) { 670 owners[i].rank = rank; 671 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 672 } 673 for (i = 0; i < n; ++i) { 674 buffer[i].index = -1; 675 buffer[i].rank = -1; 676 } 677 PetscCall(PetscSFReduceBegin(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 678 PetscCall(PetscSFReduceEnd(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 679 /* Bcast: buffer -> owners */ 680 if (!flag) { 681 /* leafIndices is different from rootIndices */ 682 PetscCall(PetscFree(owners)); 683 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 684 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 685 } 686 PetscCall(PetscSFBcastBegin(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 687 PetscCall(PetscSFBcastEnd(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 688 for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 689 PetscCall(PetscFree(buffer)); 690 if (sfA) { 691 *sfA = sf1; 692 } else PetscCall(PetscSFDestroy(&sf1)); 693 /* Create sf */ 694 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 695 /* leaf space == root space */ 696 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) 697 if (owners[i].rank != rank) ++nleaves; 698 PetscCall(PetscMalloc1(nleaves, &ilocal)); 699 PetscCall(PetscMalloc1(nleaves, &iremote)); 700 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 701 if (owners[i].rank != rank) { 702 ilocal[nleaves] = leafLocalOffset + i; 703 iremote[nleaves].rank = owners[i].rank; 704 iremote[nleaves].index = owners[i].index; 705 ++nleaves; 706 } 707 } 708 PetscCall(PetscFree(owners)); 709 } else { 710 nleaves = numLeafIndices; 711 PetscCall(PetscMalloc1(nleaves, &ilocal)); 712 for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i); 713 iremote = owners; 714 } 715 PetscCall(PetscSFCreate(comm, sf)); 716 PetscCall(PetscSFSetFromOptions(*sf)); 717 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 718 PetscFunctionReturn(0); 719 } 720