1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@C 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a PetscLayout 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - PetscLayout defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - iremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Notes: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is PETSC_OWN_POINTER. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 26 needed 27 28 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf,PetscLayout layout,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,const PetscInt *iremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 PetscCall(PetscLayoutGetLocalSize(layout,&nroots)); 39 PetscCall(PetscLayoutGetRanges(layout,&range)); 40 PetscCall(PetscMalloc1(nleaves,&remote)); 41 if (nleaves) { ls = iremote[0] + 1; } 42 for (i=0; i<nleaves; i++) { 43 const PetscInt idx = iremote[i] - ls; 44 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 45 PetscCall(PetscLayoutFindOwnerIndex(layout,iremote[i],&lr,&remote[i].index)); 46 remote[i].rank = lr; 47 ls = range[lr]; 48 ln = range[lr+1] - ls; 49 } else { 50 remote[i].rank = lr; 51 remote[i].index = idx; 52 } 53 } 54 PetscCall(PetscSFSetGraph(sf,nroots,nleaves,ilocal,localmode,remote,PETSC_OWN_POINTER)); 55 PetscFunctionReturn(0); 56 } 57 58 /*@ 59 PetscSFSetGraphSection - Sets the PetscSF graph encoding the parallel dof overlap based upon the PetscSections 60 describing the data layout. 61 62 Input Parameters: 63 + sf - The SF 64 . localSection - PetscSection describing the local data layout 65 - globalSection - PetscSection describing the global data layout 66 67 Level: developer 68 69 .seealso: PetscSFSetGraph(), PetscSFSetGraphLayout() 70 @*/ 71 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 72 { 73 MPI_Comm comm; 74 PetscLayout layout; 75 const PetscInt *ranges; 76 PetscInt *local; 77 PetscSFNode *remote; 78 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 79 PetscMPIInt size, rank; 80 81 PetscFunctionBegin; 82 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 83 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 84 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 85 86 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 87 PetscCallMPI(MPI_Comm_size(comm, &size)); 88 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 89 PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 90 PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 91 PetscCall(PetscLayoutCreate(comm, &layout)); 92 PetscCall(PetscLayoutSetBlockSize(layout, 1)); 93 PetscCall(PetscLayoutSetLocalSize(layout, nroots)); 94 PetscCall(PetscLayoutSetUp(layout)); 95 PetscCall(PetscLayoutGetRanges(layout, &ranges)); 96 for (p = pStart; p < pEnd; ++p) { 97 PetscInt gdof, gcdof; 98 99 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 100 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 101 PetscCheckFalse(gcdof > (gdof < 0 ? -(gdof+1) : gdof),PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, (gdof < 0 ? -(gdof+1) : gdof)); 102 nleaves += gdof < 0 ? -(gdof+1)-gcdof : gdof-gcdof; 103 } 104 PetscCall(PetscMalloc1(nleaves, &local)); 105 PetscCall(PetscMalloc1(nleaves, &remote)); 106 for (p = pStart, l = 0; p < pEnd; ++p) { 107 const PetscInt *cind; 108 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 109 110 PetscCall(PetscSectionGetDof(localSection, p, &dof)); 111 PetscCall(PetscSectionGetOffset(localSection, p, &off)); 112 PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof)); 113 PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind)); 114 PetscCall(PetscSectionGetDof(globalSection, p, &gdof)); 115 PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 116 PetscCall(PetscSectionGetOffset(globalSection, p, &goff)); 117 if (!gdof) continue; /* Censored point */ 118 gsize = gdof < 0 ? -(gdof+1)-gcdof : gdof-gcdof; 119 if (gsize != dof-cdof) { 120 PetscCheckFalse(gsize != dof,comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof-cdof, dof); 121 cdof = 0; /* Ignore constraints */ 122 } 123 for (d = 0, c = 0; d < dof; ++d) { 124 if ((c < cdof) && (cind[c] == d)) {++c; continue;} 125 local[l+d-c] = off+d; 126 } 127 PetscCheckFalse(d-c != gsize,comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d-c); 128 if (gdof < 0) { 129 for (d = 0; d < gsize; ++d, ++l) { 130 PetscInt offset = -(goff+1) + d, r; 131 132 PetscCall(PetscFindInt(offset,size+1,ranges,&r)); 133 if (r < 0) r = -(r+2); 134 PetscCheckFalse((r < 0) || (r >= size),PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %" PetscInt_FMT " (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 135 remote[l].rank = r; 136 remote[l].index = offset - ranges[r]; 137 } 138 } else { 139 for (d = 0; d < gsize; ++d, ++l) { 140 remote[l].rank = rank; 141 remote[l].index = goff+d - ranges[rank]; 142 } 143 } 144 } 145 PetscCheckFalse(l != nleaves,comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 146 PetscCall(PetscLayoutDestroy(&layout)); 147 PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 148 PetscFunctionReturn(0); 149 } 150 151 /*@C 152 PetscSFDistributeSection - Create a new PetscSection reorganized, moving from the root to the leaves of the SF 153 154 Collective on sf 155 156 Input Parameters: 157 + sf - The SF 158 - rootSection - Section defined on root space 159 160 Output Parameters: 161 + remoteOffsets - root offsets in leaf storage, or NULL 162 - leafSection - Section defined on the leaf space 163 164 Level: advanced 165 166 .seealso: PetscSFCreate() 167 @*/ 168 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) 169 { 170 PetscSF embedSF; 171 const PetscInt *indices; 172 IS selected; 173 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_MAX_INT, lpEnd = -1, f, c; 174 PetscBool *sub, hasc; 175 176 PetscFunctionBegin; 177 PetscCall(PetscLogEventBegin(PETSCSF_DistSect,sf,0,0,0)); 178 PetscCall(PetscSectionGetNumFields(rootSection, &numFields)); 179 if (numFields) { 180 IS perm; 181 182 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 183 leafSection->perm. To keep this permutation set by the user, we grab 184 the reference before calling PetscSectionSetNumFields() and set it 185 back after. */ 186 PetscCall(PetscSectionGetPermutation(leafSection, &perm)); 187 PetscCall(PetscObjectReference((PetscObject)perm)); 188 PetscCall(PetscSectionSetNumFields(leafSection, numFields)); 189 PetscCall(PetscSectionSetPermutation(leafSection, perm)); 190 PetscCall(ISDestroy(&perm)); 191 } 192 PetscCall(PetscMalloc1(numFields+2, &sub)); 193 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 194 for (f = 0; f < numFields; ++f) { 195 PetscSectionSym sym, dsym = NULL; 196 const char *name = NULL; 197 PetscInt numComp = 0; 198 199 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 200 PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 201 PetscCall(PetscSectionGetFieldName(rootSection, f, &name)); 202 PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym)); 203 if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym)); 204 PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp)); 205 PetscCall(PetscSectionSetFieldName(leafSection, f, name)); 206 PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym)); 207 PetscCall(PetscSectionSymDestroy(&dsym)); 208 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 209 PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name)); 210 PetscCall(PetscSectionSetComponentName(leafSection, f, c, name)); 211 } 212 } 213 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 214 PetscCall(PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL)); 215 rpEnd = PetscMin(rpEnd,nroots); 216 rpEnd = PetscMax(rpStart,rpEnd); 217 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 218 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 219 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2+numFields, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 220 if (sub[0]) { 221 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 222 PetscCall(ISGetIndices(selected, &indices)); 223 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 224 PetscCall(ISRestoreIndices(selected, &indices)); 225 PetscCall(ISDestroy(&selected)); 226 } else { 227 PetscCall(PetscObjectReference((PetscObject)sf)); 228 embedSF = sf; 229 } 230 PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 231 lpEnd++; 232 233 PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 234 235 /* Constrained dof section */ 236 hasc = sub[1]; 237 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2+f]); 238 239 /* Could fuse these at the cost of copies and extra allocation */ 240 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart],MPI_REPLACE)); 241 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart],MPI_REPLACE)); 242 if (sub[1]) { 243 PetscCall(PetscSectionCheckConstraints_Private(rootSection)); 244 PetscCall(PetscSectionCheckConstraints_Private(leafSection)); 245 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart],MPI_REPLACE)); 246 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart],MPI_REPLACE)); 247 } 248 for (f = 0; f < numFields; ++f) { 249 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart],MPI_REPLACE)); 250 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart],MPI_REPLACE)); 251 if (sub[2+f]) { 252 PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f])); 253 PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f])); 254 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart],MPI_REPLACE)); 255 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart],MPI_REPLACE)); 256 } 257 } 258 if (remoteOffsets) { 259 PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 260 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 261 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 262 } 263 PetscCall(PetscSectionSetUp(leafSection)); 264 if (hasc) { /* need to communicate bcIndices */ 265 PetscSF bcSF; 266 PetscInt *rOffBc; 267 268 PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 269 if (sub[1]) { 270 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 271 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 272 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 273 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices,MPI_REPLACE)); 274 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices,MPI_REPLACE)); 275 PetscCall(PetscSFDestroy(&bcSF)); 276 } 277 for (f = 0; f < numFields; ++f) { 278 if (sub[2+f]) { 279 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 280 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 281 PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 282 PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices,MPI_REPLACE)); 283 PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices,MPI_REPLACE)); 284 PetscCall(PetscSFDestroy(&bcSF)); 285 } 286 } 287 PetscCall(PetscFree(rOffBc)); 288 } 289 PetscCall(PetscSFDestroy(&embedSF)); 290 PetscCall(PetscFree(sub)); 291 PetscCall(PetscLogEventEnd(PETSCSF_DistSect,sf,0,0,0)); 292 PetscFunctionReturn(0); 293 } 294 295 /*@C 296 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 297 298 Collective on sf 299 300 Input Parameters: 301 + sf - The SF 302 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 303 - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 304 305 Output Parameter: 306 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 307 308 Level: developer 309 310 .seealso: PetscSFCreate() 311 @*/ 312 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) 313 { 314 PetscSF embedSF; 315 const PetscInt *indices; 316 IS selected; 317 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 318 319 PetscFunctionBegin; 320 *remoteOffsets = NULL; 321 PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 322 if (numRoots < 0) PetscFunctionReturn(0); 323 PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff,sf,0,0,0)); 324 PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 325 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 326 PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 327 PetscCall(ISGetIndices(selected, &indices)); 328 PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 329 PetscCall(ISRestoreIndices(selected, &indices)); 330 PetscCall(ISDestroy(&selected)); 331 PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 332 PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 333 PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 334 PetscCall(PetscSFDestroy(&embedSF)); 335 PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff,sf,0,0,0)); 336 PetscFunctionReturn(0); 337 } 338 339 /*@C 340 PetscSFCreateSectionSF - Create an expanded SF of dofs, assuming the input SF relates points 341 342 Collective on sf 343 344 Input Parameters: 345 + sf - The SF 346 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 347 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 348 - leafSection - Data layout of local points for incoming data (this is the distributed section) 349 350 Output Parameters: 351 - sectionSF - The new SF 352 353 Note: Either rootSection or remoteOffsets can be specified 354 355 Level: advanced 356 357 .seealso: PetscSFCreate() 358 @*/ 359 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 360 { 361 MPI_Comm comm; 362 const PetscInt *localPoints; 363 const PetscSFNode *remotePoints; 364 PetscInt lpStart, lpEnd; 365 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 366 PetscInt *localIndices; 367 PetscSFNode *remoteIndices; 368 PetscInt i, ind; 369 370 PetscFunctionBegin; 371 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 372 PetscValidPointer(rootSection,2); 373 /* Cannot check PetscValidIntPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 374 PetscValidPointer(leafSection,4); 375 PetscValidPointer(sectionSF,5); 376 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 377 PetscCall(PetscSFCreate(comm, sectionSF)); 378 PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 379 PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 380 PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 381 if (numRoots < 0) PetscFunctionReturn(0); 382 PetscCall(PetscLogEventBegin(PETSCSF_SectSF,sf,0,0,0)); 383 for (i = 0; i < numPoints; ++i) { 384 PetscInt localPoint = localPoints ? localPoints[i] : i; 385 PetscInt dof; 386 387 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 388 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 389 numIndices += dof; 390 } 391 } 392 PetscCall(PetscMalloc1(numIndices, &localIndices)); 393 PetscCall(PetscMalloc1(numIndices, &remoteIndices)); 394 /* Create new index graph */ 395 for (i = 0, ind = 0; i < numPoints; ++i) { 396 PetscInt localPoint = localPoints ? localPoints[i] : i; 397 PetscInt rank = remotePoints[i].rank; 398 399 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 400 PetscInt remoteOffset = remoteOffsets[localPoint-lpStart]; 401 PetscInt loff, dof, d; 402 403 PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff)); 404 PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof)); 405 for (d = 0; d < dof; ++d, ++ind) { 406 localIndices[ind] = loff+d; 407 remoteIndices[ind].rank = rank; 408 remoteIndices[ind].index = remoteOffset+d; 409 } 410 } 411 } 412 PetscCheckFalse(numIndices != ind,comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 413 PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 414 PetscCall(PetscSFSetUp(*sectionSF)); 415 PetscCall(PetscLogEventEnd(PETSCSF_SectSF,sf,0,0,0)); 416 PetscFunctionReturn(0); 417 } 418 419 /*@C 420 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two PetscLayout objects 421 422 Collective 423 424 Input Parameters: 425 + rmap - PetscLayout defining the global root space 426 - lmap - PetscLayout defining the global leaf space 427 428 Output Parameter: 429 . sf - The parallel star forest 430 431 Level: intermediate 432 433 .seealso: PetscSFCreate(), PetscLayoutCreate(), PetscSFSetGraphLayout() 434 @*/ 435 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF* sf) 436 { 437 PetscInt i,nroots,nleaves = 0; 438 PetscInt rN, lst, len; 439 PetscMPIInt owner = -1; 440 PetscSFNode *remote; 441 MPI_Comm rcomm = rmap->comm; 442 MPI_Comm lcomm = lmap->comm; 443 PetscMPIInt flg; 444 445 PetscFunctionBegin; 446 PetscValidPointer(sf,3); 447 PetscCheck(rmap->setupcalled,rcomm,PETSC_ERR_ARG_WRONGSTATE,"Root layout not setup"); 448 PetscCheck(lmap->setupcalled,lcomm,PETSC_ERR_ARG_WRONGSTATE,"Leaf layout not setup"); 449 PetscCallMPI(MPI_Comm_compare(rcomm,lcomm,&flg)); 450 PetscCheckFalse(flg != MPI_CONGRUENT && flg != MPI_IDENT,rcomm,PETSC_ERR_SUP,"cannot map two layouts with non-matching communicators"); 451 PetscCall(PetscSFCreate(rcomm,sf)); 452 PetscCall(PetscLayoutGetLocalSize(rmap,&nroots)); 453 PetscCall(PetscLayoutGetSize(rmap,&rN)); 454 PetscCall(PetscLayoutGetRange(lmap,&lst,&len)); 455 PetscCall(PetscMalloc1(len-lst,&remote)); 456 for (i = lst; i < len && i < rN; i++) { 457 if (owner < -1 || i >= rmap->range[owner+1]) { 458 PetscCall(PetscLayoutFindOwner(rmap,i,&owner)); 459 } 460 remote[nleaves].rank = owner; 461 remote[nleaves].index = i - rmap->range[owner]; 462 nleaves++; 463 } 464 PetscCall(PetscSFSetGraph(*sf,nroots,nleaves,NULL,PETSC_OWN_POINTER,remote,PETSC_COPY_VALUES)); 465 PetscCall(PetscFree(remote)); 466 PetscFunctionReturn(0); 467 } 468 469 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 470 PetscErrorCode PetscLayoutMapLocal(PetscLayout map,PetscInt N,const PetscInt idxs[], PetscInt *on,PetscInt **oidxs,PetscInt **ogidxs) 471 { 472 PetscInt *owners = map->range; 473 PetscInt n = map->n; 474 PetscSF sf; 475 PetscInt *lidxs,*work = NULL; 476 PetscSFNode *ridxs; 477 PetscMPIInt rank, p = 0; 478 PetscInt r, len = 0; 479 480 PetscFunctionBegin; 481 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 482 /* Create SF where leaves are input idxs and roots are owned idxs */ 483 PetscCallMPI(MPI_Comm_rank(map->comm,&rank)); 484 PetscCall(PetscMalloc1(n,&lidxs)); 485 for (r = 0; r < n; ++r) lidxs[r] = -1; 486 PetscCall(PetscMalloc1(N,&ridxs)); 487 for (r = 0; r < N; ++r) { 488 const PetscInt idx = idxs[r]; 489 PetscCheckFalse(idx < 0 || map->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,map->N); 490 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 491 PetscCall(PetscLayoutFindOwner(map,idx,&p)); 492 } 493 ridxs[r].rank = p; 494 ridxs[r].index = idxs[r] - owners[p]; 495 } 496 PetscCall(PetscSFCreate(map->comm,&sf)); 497 PetscCall(PetscSFSetGraph(sf,n,N,NULL,PETSC_OWN_POINTER,ridxs,PETSC_OWN_POINTER)); 498 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,(PetscInt*)idxs,lidxs,MPI_LOR)); 499 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,(PetscInt*)idxs,lidxs,MPI_LOR)); 500 if (ogidxs) { /* communicate global idxs */ 501 PetscInt cum = 0,start,*work2; 502 503 PetscCall(PetscMalloc1(n,&work)); 504 PetscCall(PetscCalloc1(N,&work2)); 505 for (r = 0; r < N; ++r) if (idxs[r] >=0) cum++; 506 PetscCallMPI(MPI_Scan(&cum,&start,1,MPIU_INT,MPI_SUM,map->comm)); 507 start -= cum; 508 cum = 0; 509 for (r = 0; r < N; ++r) if (idxs[r] >=0) work2[r] = start+cum++; 510 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work2,work,MPI_REPLACE)); 511 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work2,work,MPI_REPLACE)); 512 PetscCall(PetscFree(work2)); 513 } 514 PetscCall(PetscSFDestroy(&sf)); 515 /* Compress and put in indices */ 516 for (r = 0; r < n; ++r) 517 if (lidxs[r] >= 0) { 518 if (work) work[len] = work[r]; 519 lidxs[len++] = r; 520 } 521 if (on) *on = len; 522 if (oidxs) *oidxs = lidxs; 523 if (ogidxs) *ogidxs = work; 524 PetscFunctionReturn(0); 525 } 526 527 /*@ 528 PetscSFCreateByMatchingIndices - Create SF by matching root and leaf indices 529 530 Collective 531 532 Input Parameters: 533 + layout - PetscLayout defining the global index space and the rank that brokers each index 534 . numRootIndices - size of rootIndices 535 . rootIndices - PetscInt array of global indices of which this process requests ownership 536 . rootLocalIndices - root local index permutation (NULL if no permutation) 537 . rootLocalOffset - offset to be added to root local indices 538 . numLeafIndices - size of leafIndices 539 . leafIndices - PetscInt array of global indices with which this process requires data associated 540 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 541 - leafLocalOffset - offset to be added to leaf local indices 542 543 Output Parameters: 544 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 545 - sf - star forest representing the communication pattern from the root space to the leaf space 546 547 Example 1: 548 $ 549 $ rank : 0 1 2 550 $ rootIndices : [1 0 2] [3] [3] 551 $ rootLocalOffset : 100 200 300 552 $ layout : [0 1] [2] [3] 553 $ leafIndices : [0] [2] [0 3] 554 $ leafLocalOffset : 400 500 600 555 $ 556 would build the following SF 557 $ 558 $ [0] 400 <- (0,101) 559 $ [1] 500 <- (0,102) 560 $ [2] 600 <- (0,101) 561 $ [2] 601 <- (2,300) 562 $ 563 Example 2: 564 $ 565 $ rank : 0 1 2 566 $ rootIndices : [1 0 2] [3] [3] 567 $ rootLocalOffset : 100 200 300 568 $ layout : [0 1] [2] [3] 569 $ leafIndices : rootIndices rootIndices rootIndices 570 $ leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 571 $ 572 would build the following SF 573 $ 574 $ [1] 200 <- (2,300) 575 $ 576 Example 3: 577 $ 578 $ No process requests ownership of global index 1, but no process needs it. 579 $ 580 $ rank : 0 1 2 581 $ numRootIndices : 2 1 1 582 $ rootIndices : [0 2] [3] [3] 583 $ rootLocalOffset : 100 200 300 584 $ layout : [0 1] [2] [3] 585 $ numLeafIndices : 1 1 2 586 $ leafIndices : [0] [2] [0 3] 587 $ leafLocalOffset : 400 500 600 588 $ 589 would build the following SF 590 $ 591 $ [0] 400 <- (0,100) 592 $ [1] 500 <- (0,101) 593 $ [2] 600 <- (0,100) 594 $ [2] 601 <- (2,300) 595 $ 596 597 Notes: 598 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 599 local size can be set to PETSC_DECIDE. 600 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 601 ownership of x and sends its own rank and the local index of x to process i. 602 If multiple processes request ownership of x, the one with the highest rank is to own x. 603 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 604 ownership information of x. 605 The output sf is constructed by associating each leaf point to a root point in this way. 606 607 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 608 The optional output PetscSF object sfA can be used to push such data to leaf points. 609 610 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 611 must cover that of leafIndices, but need not cover the entire layout. 612 613 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 614 star forest is almost identity, so will only include non-trivial part of the map. 615 616 Developer Notes: 617 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 618 hash(rank, root_local_index) as the bid for the ownership determination. 619 620 Level: advanced 621 622 .seealso: PetscSFCreate() 623 @*/ 624 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 625 { 626 MPI_Comm comm = layout->comm; 627 PetscMPIInt size, rank; 628 PetscSF sf1; 629 PetscSFNode *owners, *buffer, *iremote; 630 PetscInt *ilocal, nleaves, N, n, i; 631 #if defined(PETSC_USE_DEBUG) 632 PetscInt N1; 633 #endif 634 PetscBool flag; 635 636 PetscFunctionBegin; 637 if (rootIndices) PetscValidIntPointer(rootIndices,3); 638 if (rootLocalIndices) PetscValidIntPointer(rootLocalIndices,4); 639 if (leafIndices) PetscValidIntPointer(leafIndices,7); 640 if (leafLocalIndices) PetscValidIntPointer(leafLocalIndices,8); 641 if (sfA) PetscValidPointer(sfA,10); 642 PetscValidPointer(sf,11); 643 PetscCheckFalse(numRootIndices < 0,PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 644 PetscCheckFalse(numLeafIndices < 0,PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 645 PetscCallMPI(MPI_Comm_size(comm, &size)); 646 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 647 PetscCall(PetscLayoutSetUp(layout)); 648 PetscCall(PetscLayoutGetSize(layout, &N)); 649 PetscCall(PetscLayoutGetLocalSize(layout, &n)); 650 flag = (PetscBool)(leafIndices == rootIndices); 651 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 652 PetscCheckFalse(flag && numLeafIndices != numRootIndices,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 653 #if defined(PETSC_USE_DEBUG) 654 N1 = PETSC_MIN_INT; 655 for (i = 0; i < numRootIndices; i++) if (rootIndices[i] > N1) N1 = rootIndices[i]; 656 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 657 PetscCheckFalse(N1 >= N,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 658 if (!flag) { 659 N1 = PETSC_MIN_INT; 660 for (i = 0; i < numLeafIndices; i++) if (leafIndices[i] > N1) N1 = leafIndices[i]; 661 PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 662 PetscCheckFalse(N1 >= N,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 663 } 664 #endif 665 /* Reduce: owners -> buffer */ 666 PetscCall(PetscMalloc1(n, &buffer)); 667 PetscCall(PetscSFCreate(comm, &sf1)); 668 PetscCall(PetscSFSetFromOptions(sf1)); 669 PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 670 PetscCall(PetscMalloc1(numRootIndices, &owners)); 671 for (i = 0; i < numRootIndices; ++i) { 672 owners[i].rank = rank; 673 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 674 } 675 for (i = 0; i < n; ++i) { 676 buffer[i].index = -1; 677 buffer[i].rank = -1; 678 } 679 PetscCall(PetscSFReduceBegin(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 680 PetscCall(PetscSFReduceEnd(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 681 /* Bcast: buffer -> owners */ 682 if (!flag) { 683 /* leafIndices is different from rootIndices */ 684 PetscCall(PetscFree(owners)); 685 PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 686 PetscCall(PetscMalloc1(numLeafIndices, &owners)); 687 } 688 PetscCall(PetscSFBcastBegin(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 689 PetscCall(PetscSFBcastEnd(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 690 for (i = 0; i < numLeafIndices; ++i) PetscCheckFalse(owners[i].rank < 0,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 691 PetscCall(PetscFree(buffer)); 692 if (sfA) {*sfA = sf1;} 693 else PetscCall(PetscSFDestroy(&sf1)); 694 /* Create sf */ 695 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 696 /* leaf space == root space */ 697 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) if (owners[i].rank != rank) ++nleaves; 698 PetscCall(PetscMalloc1(nleaves, &ilocal)); 699 PetscCall(PetscMalloc1(nleaves, &iremote)); 700 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 701 if (owners[i].rank != rank) { 702 ilocal[nleaves] = leafLocalOffset + i; 703 iremote[nleaves].rank = owners[i].rank; 704 iremote[nleaves].index = owners[i].index; 705 ++nleaves; 706 } 707 } 708 PetscCall(PetscFree(owners)); 709 } else { 710 nleaves = numLeafIndices; 711 PetscCall(PetscMalloc1(nleaves, &ilocal)); 712 for (i = 0; i < nleaves; ++i) {ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);} 713 iremote = owners; 714 } 715 PetscCall(PetscSFCreate(comm, sf)); 716 PetscCall(PetscSFSetFromOptions(*sf)); 717 PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 718 PetscFunctionReturn(0); 719 } 720