1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2 #include <petsc/private/sectionimpl.h> 3 4 /*@C 5 PetscSFSetGraphLayout - Set a parallel star forest via global indices and a PetscLayout 6 7 Collective 8 9 Input Parameters: 10 + sf - star forest 11 . layout - PetscLayout defining the global space for roots 12 . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14 . localmode - copy mode for ilocal 15 - iremote - root vertices in global numbering corresponding to leaves in ilocal 16 17 Level: intermediate 18 19 Notes: 20 Global indices must lie in [0, N) where N is the global size of layout. 21 Leaf indices in ilocal get sorted; this means the user-provided array gets sorted if localmode is PETSC_OWN_POINTER. 22 23 Developer Notes: 24 Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 25 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 26 needed 27 28 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 29 @*/ 30 PetscErrorCode PetscSFSetGraphLayout(PetscSF sf,PetscLayout layout,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,const PetscInt *iremote) 31 { 32 const PetscInt *range; 33 PetscInt i, nroots, ls = -1, ln = -1; 34 PetscMPIInt lr = -1; 35 PetscSFNode *remote; 36 37 PetscFunctionBegin; 38 CHKERRQ(PetscLayoutGetLocalSize(layout,&nroots)); 39 CHKERRQ(PetscLayoutGetRanges(layout,&range)); 40 CHKERRQ(PetscMalloc1(nleaves,&remote)); 41 if (nleaves) { ls = iremote[0] + 1; } 42 for (i=0; i<nleaves; i++) { 43 const PetscInt idx = iremote[i] - ls; 44 if (idx < 0 || idx >= ln) { /* short-circuit the search */ 45 CHKERRQ(PetscLayoutFindOwnerIndex(layout,iremote[i],&lr,&remote[i].index)); 46 remote[i].rank = lr; 47 ls = range[lr]; 48 ln = range[lr+1] - ls; 49 } else { 50 remote[i].rank = lr; 51 remote[i].index = idx; 52 } 53 } 54 CHKERRQ(PetscSFSetGraph(sf,nroots,nleaves,ilocal,localmode,remote,PETSC_OWN_POINTER)); 55 PetscFunctionReturn(0); 56 } 57 58 /*@ 59 PetscSFSetGraphSection - Sets the PetscSF graph encoding the parallel dof overlap based upon the PetscSections 60 describing the data layout. 61 62 Input Parameters: 63 + sf - The SF 64 . localSection - PetscSection describing the local data layout 65 - globalSection - PetscSection describing the global data layout 66 67 Level: developer 68 69 .seealso: PetscSFSetGraph(), PetscSFSetGraphLayout() 70 @*/ 71 PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 72 { 73 MPI_Comm comm; 74 PetscLayout layout; 75 const PetscInt *ranges; 76 PetscInt *local; 77 PetscSFNode *remote; 78 PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 79 PetscMPIInt size, rank; 80 81 PetscFunctionBegin; 82 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 83 PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 84 PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 85 86 CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm)); 87 CHKERRMPI(MPI_Comm_size(comm, &size)); 88 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 89 CHKERRQ(PetscSectionGetChart(globalSection, &pStart, &pEnd)); 90 CHKERRQ(PetscSectionGetConstrainedStorageSize(globalSection, &nroots)); 91 CHKERRQ(PetscLayoutCreate(comm, &layout)); 92 CHKERRQ(PetscLayoutSetBlockSize(layout, 1)); 93 CHKERRQ(PetscLayoutSetLocalSize(layout, nroots)); 94 CHKERRQ(PetscLayoutSetUp(layout)); 95 CHKERRQ(PetscLayoutGetRanges(layout, &ranges)); 96 for (p = pStart; p < pEnd; ++p) { 97 PetscInt gdof, gcdof; 98 99 CHKERRQ(PetscSectionGetDof(globalSection, p, &gdof)); 100 CHKERRQ(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 101 PetscCheckFalse(gcdof > (gdof < 0 ? -(gdof+1) : gdof),PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, (gdof < 0 ? -(gdof+1) : gdof)); 102 nleaves += gdof < 0 ? -(gdof+1)-gcdof : gdof-gcdof; 103 } 104 CHKERRQ(PetscMalloc1(nleaves, &local)); 105 CHKERRQ(PetscMalloc1(nleaves, &remote)); 106 for (p = pStart, l = 0; p < pEnd; ++p) { 107 const PetscInt *cind; 108 PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 109 110 CHKERRQ(PetscSectionGetDof(localSection, p, &dof)); 111 CHKERRQ(PetscSectionGetOffset(localSection, p, &off)); 112 CHKERRQ(PetscSectionGetConstraintDof(localSection, p, &cdof)); 113 CHKERRQ(PetscSectionGetConstraintIndices(localSection, p, &cind)); 114 CHKERRQ(PetscSectionGetDof(globalSection, p, &gdof)); 115 CHKERRQ(PetscSectionGetConstraintDof(globalSection, p, &gcdof)); 116 CHKERRQ(PetscSectionGetOffset(globalSection, p, &goff)); 117 if (!gdof) continue; /* Censored point */ 118 gsize = gdof < 0 ? -(gdof+1)-gcdof : gdof-gcdof; 119 if (gsize != dof-cdof) { 120 PetscCheckFalse(gsize != dof,comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof-cdof, dof); 121 cdof = 0; /* Ignore constraints */ 122 } 123 for (d = 0, c = 0; d < dof; ++d) { 124 if ((c < cdof) && (cind[c] == d)) {++c; continue;} 125 local[l+d-c] = off+d; 126 } 127 PetscCheckFalse(d-c != gsize,comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d-c); 128 if (gdof < 0) { 129 for (d = 0; d < gsize; ++d, ++l) { 130 PetscInt offset = -(goff+1) + d, r; 131 132 CHKERRQ(PetscFindInt(offset,size+1,ranges,&r)); 133 if (r < 0) r = -(r+2); 134 PetscCheckFalse((r < 0) || (r >= size),PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %" PetscInt_FMT " (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff); 135 remote[l].rank = r; 136 remote[l].index = offset - ranges[r]; 137 } 138 } else { 139 for (d = 0; d < gsize; ++d, ++l) { 140 remote[l].rank = rank; 141 remote[l].index = goff+d - ranges[rank]; 142 } 143 } 144 } 145 PetscCheckFalse(l != nleaves,comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves); 146 CHKERRQ(PetscLayoutDestroy(&layout)); 147 CHKERRQ(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER)); 148 PetscFunctionReturn(0); 149 } 150 151 static PetscErrorCode PetscSectionCheckConstraints_Static(PetscSection s) 152 { 153 PetscFunctionBegin; 154 if (!s->bc) { 155 CHKERRQ(PetscSectionCreate(PETSC_COMM_SELF, &s->bc)); 156 CHKERRQ(PetscSectionSetChart(s->bc, s->pStart, s->pEnd)); 157 } 158 PetscFunctionReturn(0); 159 } 160 161 /*@C 162 PetscSFDistributeSection - Create a new PetscSection reorganized, moving from the root to the leaves of the SF 163 164 Collective on sf 165 166 Input Parameters: 167 + sf - The SF 168 - rootSection - Section defined on root space 169 170 Output Parameters: 171 + remoteOffsets - root offsets in leaf storage, or NULL 172 - leafSection - Section defined on the leaf space 173 174 Level: advanced 175 176 .seealso: PetscSFCreate() 177 @*/ 178 PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) 179 { 180 PetscSF embedSF; 181 const PetscInt *indices; 182 IS selected; 183 PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_MAX_INT, lpEnd = -1, f, c; 184 PetscBool *sub, hasc; 185 186 PetscFunctionBegin; 187 CHKERRQ(PetscLogEventBegin(PETSCSF_DistSect,sf,0,0,0)); 188 CHKERRQ(PetscSectionGetNumFields(rootSection, &numFields)); 189 if (numFields) { 190 IS perm; 191 192 /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys 193 leafSection->perm. To keep this permutation set by the user, we grab 194 the reference before calling PetscSectionSetNumFields() and set it 195 back after. */ 196 CHKERRQ(PetscSectionGetPermutation(leafSection, &perm)); 197 CHKERRQ(PetscObjectReference((PetscObject)perm)); 198 CHKERRQ(PetscSectionSetNumFields(leafSection, numFields)); 199 CHKERRQ(PetscSectionSetPermutation(leafSection, perm)); 200 CHKERRQ(ISDestroy(&perm)); 201 } 202 CHKERRQ(PetscMalloc1(numFields+2, &sub)); 203 sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 204 for (f = 0; f < numFields; ++f) { 205 PetscSectionSym sym, dsym = NULL; 206 const char *name = NULL; 207 PetscInt numComp = 0; 208 209 sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 210 CHKERRQ(PetscSectionGetFieldComponents(rootSection, f, &numComp)); 211 CHKERRQ(PetscSectionGetFieldName(rootSection, f, &name)); 212 CHKERRQ(PetscSectionGetFieldSym(rootSection, f, &sym)); 213 if (sym) CHKERRQ(PetscSectionSymDistribute(sym, sf, &dsym)); 214 CHKERRQ(PetscSectionSetFieldComponents(leafSection, f, numComp)); 215 CHKERRQ(PetscSectionSetFieldName(leafSection, f, name)); 216 CHKERRQ(PetscSectionSetFieldSym(leafSection, f, dsym)); 217 CHKERRQ(PetscSectionSymDestroy(&dsym)); 218 for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 219 CHKERRQ(PetscSectionGetComponentName(rootSection, f, c, &name)); 220 CHKERRQ(PetscSectionSetComponentName(leafSection, f, c, name)); 221 } 222 } 223 CHKERRQ(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 224 CHKERRQ(PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL)); 225 rpEnd = PetscMin(rpEnd,nroots); 226 rpEnd = PetscMax(rpStart,rpEnd); 227 /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 228 sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 229 CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2+numFields, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf))); 230 if (sub[0]) { 231 CHKERRQ(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 232 CHKERRQ(ISGetIndices(selected, &indices)); 233 CHKERRQ(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 234 CHKERRQ(ISRestoreIndices(selected, &indices)); 235 CHKERRQ(ISDestroy(&selected)); 236 } else { 237 CHKERRQ(PetscObjectReference((PetscObject)sf)); 238 embedSF = sf; 239 } 240 CHKERRQ(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd)); 241 lpEnd++; 242 243 CHKERRQ(PetscSectionSetChart(leafSection, lpStart, lpEnd)); 244 245 /* Constrained dof section */ 246 hasc = sub[1]; 247 for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2+f]); 248 249 /* Could fuse these at the cost of copies and extra allocation */ 250 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart],MPI_REPLACE)); 251 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart],MPI_REPLACE)); 252 if (sub[1]) { 253 CHKERRQ(PetscSectionCheckConstraints_Static(rootSection)); 254 CHKERRQ(PetscSectionCheckConstraints_Static(leafSection)); 255 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart],MPI_REPLACE)); 256 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart],MPI_REPLACE)); 257 } 258 for (f = 0; f < numFields; ++f) { 259 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart],MPI_REPLACE)); 260 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart],MPI_REPLACE)); 261 if (sub[2+f]) { 262 CHKERRQ(PetscSectionCheckConstraints_Static(rootSection->field[f])); 263 CHKERRQ(PetscSectionCheckConstraints_Static(leafSection->field[f])); 264 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart],MPI_REPLACE)); 265 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart],MPI_REPLACE)); 266 } 267 } 268 if (remoteOffsets) { 269 CHKERRQ(PetscMalloc1(lpEnd - lpStart, remoteOffsets)); 270 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 271 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 272 } 273 CHKERRQ(PetscSectionSetUp(leafSection)); 274 if (hasc) { /* need to communicate bcIndices */ 275 PetscSF bcSF; 276 PetscInt *rOffBc; 277 278 CHKERRQ(PetscMalloc1(lpEnd - lpStart, &rOffBc)); 279 if (sub[1]) { 280 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 281 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 282 CHKERRQ(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF)); 283 CHKERRQ(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices,MPI_REPLACE)); 284 CHKERRQ(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices,MPI_REPLACE)); 285 CHKERRQ(PetscSFDestroy(&bcSF)); 286 } 287 for (f = 0; f < numFields; ++f) { 288 if (sub[2+f]) { 289 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 290 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE)); 291 CHKERRQ(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF)); 292 CHKERRQ(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices,MPI_REPLACE)); 293 CHKERRQ(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices,MPI_REPLACE)); 294 CHKERRQ(PetscSFDestroy(&bcSF)); 295 } 296 } 297 CHKERRQ(PetscFree(rOffBc)); 298 } 299 CHKERRQ(PetscSFDestroy(&embedSF)); 300 CHKERRQ(PetscFree(sub)); 301 CHKERRQ(PetscLogEventEnd(PETSCSF_DistSect,sf,0,0,0)); 302 PetscFunctionReturn(0); 303 } 304 305 /*@C 306 PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 307 308 Collective on sf 309 310 Input Parameters: 311 + sf - The SF 312 . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 313 - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 314 315 Output Parameter: 316 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 317 318 Level: developer 319 320 .seealso: PetscSFCreate() 321 @*/ 322 PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) 323 { 324 PetscSF embedSF; 325 const PetscInt *indices; 326 IS selected; 327 PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 328 329 PetscFunctionBegin; 330 *remoteOffsets = NULL; 331 CHKERRQ(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL)); 332 if (numRoots < 0) PetscFunctionReturn(0); 333 CHKERRQ(PetscLogEventBegin(PETSCSF_RemoteOff,sf,0,0,0)); 334 CHKERRQ(PetscSectionGetChart(rootSection, &rpStart, &rpEnd)); 335 CHKERRQ(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 336 CHKERRQ(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected)); 337 CHKERRQ(ISGetIndices(selected, &indices)); 338 CHKERRQ(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF)); 339 CHKERRQ(ISRestoreIndices(selected, &indices)); 340 CHKERRQ(ISDestroy(&selected)); 341 CHKERRQ(PetscCalloc1(lpEnd - lpStart, remoteOffsets)); 342 CHKERRQ(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 343 CHKERRQ(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE)); 344 CHKERRQ(PetscSFDestroy(&embedSF)); 345 CHKERRQ(PetscLogEventEnd(PETSCSF_RemoteOff,sf,0,0,0)); 346 PetscFunctionReturn(0); 347 } 348 349 /*@C 350 PetscSFCreateSectionSF - Create an expanded SF of dofs, assuming the input SF relates points 351 352 Collective on sf 353 354 Input Parameters: 355 + sf - The SF 356 . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 357 . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 358 - leafSection - Data layout of local points for incoming data (this is the distributed section) 359 360 Output Parameters: 361 - sectionSF - The new SF 362 363 Note: Either rootSection or remoteOffsets can be specified 364 365 Level: advanced 366 367 .seealso: PetscSFCreate() 368 @*/ 369 PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 370 { 371 MPI_Comm comm; 372 const PetscInt *localPoints; 373 const PetscSFNode *remotePoints; 374 PetscInt lpStart, lpEnd; 375 PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 376 PetscInt *localIndices; 377 PetscSFNode *remoteIndices; 378 PetscInt i, ind; 379 380 PetscFunctionBegin; 381 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 382 PetscValidPointer(rootSection,2); 383 /* Cannot check PetscValidIntPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 384 PetscValidPointer(leafSection,4); 385 PetscValidPointer(sectionSF,5); 386 CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm)); 387 CHKERRQ(PetscSFCreate(comm, sectionSF)); 388 CHKERRQ(PetscSectionGetChart(leafSection, &lpStart, &lpEnd)); 389 CHKERRQ(PetscSectionGetStorageSize(rootSection, &numSectionRoots)); 390 CHKERRQ(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints)); 391 if (numRoots < 0) PetscFunctionReturn(0); 392 CHKERRQ(PetscLogEventBegin(PETSCSF_SectSF,sf,0,0,0)); 393 for (i = 0; i < numPoints; ++i) { 394 PetscInt localPoint = localPoints ? localPoints[i] : i; 395 PetscInt dof; 396 397 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 398 CHKERRQ(PetscSectionGetDof(leafSection, localPoint, &dof)); 399 numIndices += dof; 400 } 401 } 402 CHKERRQ(PetscMalloc1(numIndices, &localIndices)); 403 CHKERRQ(PetscMalloc1(numIndices, &remoteIndices)); 404 /* Create new index graph */ 405 for (i = 0, ind = 0; i < numPoints; ++i) { 406 PetscInt localPoint = localPoints ? localPoints[i] : i; 407 PetscInt rank = remotePoints[i].rank; 408 409 if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 410 PetscInt remoteOffset = remoteOffsets[localPoint-lpStart]; 411 PetscInt loff, dof, d; 412 413 CHKERRQ(PetscSectionGetOffset(leafSection, localPoint, &loff)); 414 CHKERRQ(PetscSectionGetDof(leafSection, localPoint, &dof)); 415 for (d = 0; d < dof; ++d, ++ind) { 416 localIndices[ind] = loff+d; 417 remoteIndices[ind].rank = rank; 418 remoteIndices[ind].index = remoteOffset+d; 419 } 420 } 421 } 422 PetscCheckFalse(numIndices != ind,comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices); 423 CHKERRQ(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER)); 424 CHKERRQ(PetscSFSetUp(*sectionSF)); 425 CHKERRQ(PetscLogEventEnd(PETSCSF_SectSF,sf,0,0,0)); 426 PetscFunctionReturn(0); 427 } 428 429 /*@C 430 PetscSFCreateFromLayouts - Creates a parallel star forest mapping two PetscLayout objects 431 432 Collective 433 434 Input Parameters: 435 + rmap - PetscLayout defining the global root space 436 - lmap - PetscLayout defining the global leaf space 437 438 Output Parameter: 439 . sf - The parallel star forest 440 441 Level: intermediate 442 443 .seealso: PetscSFCreate(), PetscLayoutCreate(), PetscSFSetGraphLayout() 444 @*/ 445 PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF* sf) 446 { 447 PetscInt i,nroots,nleaves = 0; 448 PetscInt rN, lst, len; 449 PetscMPIInt owner = -1; 450 PetscSFNode *remote; 451 MPI_Comm rcomm = rmap->comm; 452 MPI_Comm lcomm = lmap->comm; 453 PetscMPIInt flg; 454 455 PetscFunctionBegin; 456 PetscValidPointer(sf,3); 457 PetscCheck(rmap->setupcalled,rcomm,PETSC_ERR_ARG_WRONGSTATE,"Root layout not setup"); 458 PetscCheck(lmap->setupcalled,lcomm,PETSC_ERR_ARG_WRONGSTATE,"Leaf layout not setup"); 459 CHKERRMPI(MPI_Comm_compare(rcomm,lcomm,&flg)); 460 PetscCheckFalse(flg != MPI_CONGRUENT && flg != MPI_IDENT,rcomm,PETSC_ERR_SUP,"cannot map two layouts with non-matching communicators"); 461 CHKERRQ(PetscSFCreate(rcomm,sf)); 462 CHKERRQ(PetscLayoutGetLocalSize(rmap,&nroots)); 463 CHKERRQ(PetscLayoutGetSize(rmap,&rN)); 464 CHKERRQ(PetscLayoutGetRange(lmap,&lst,&len)); 465 CHKERRQ(PetscMalloc1(len-lst,&remote)); 466 for (i = lst; i < len && i < rN; i++) { 467 if (owner < -1 || i >= rmap->range[owner+1]) { 468 CHKERRQ(PetscLayoutFindOwner(rmap,i,&owner)); 469 } 470 remote[nleaves].rank = owner; 471 remote[nleaves].index = i - rmap->range[owner]; 472 nleaves++; 473 } 474 CHKERRQ(PetscSFSetGraph(*sf,nroots,nleaves,NULL,PETSC_OWN_POINTER,remote,PETSC_COPY_VALUES)); 475 CHKERRQ(PetscFree(remote)); 476 PetscFunctionReturn(0); 477 } 478 479 /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 480 PetscErrorCode PetscLayoutMapLocal(PetscLayout map,PetscInt N,const PetscInt idxs[], PetscInt *on,PetscInt **oidxs,PetscInt **ogidxs) 481 { 482 PetscInt *owners = map->range; 483 PetscInt n = map->n; 484 PetscSF sf; 485 PetscInt *lidxs,*work = NULL; 486 PetscSFNode *ridxs; 487 PetscMPIInt rank, p = 0; 488 PetscInt r, len = 0; 489 490 PetscFunctionBegin; 491 if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 492 /* Create SF where leaves are input idxs and roots are owned idxs */ 493 CHKERRMPI(MPI_Comm_rank(map->comm,&rank)); 494 CHKERRQ(PetscMalloc1(n,&lidxs)); 495 for (r = 0; r < n; ++r) lidxs[r] = -1; 496 CHKERRQ(PetscMalloc1(N,&ridxs)); 497 for (r = 0; r < N; ++r) { 498 const PetscInt idx = idxs[r]; 499 PetscCheckFalse(idx < 0 || map->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,map->N); 500 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 501 CHKERRQ(PetscLayoutFindOwner(map,idx,&p)); 502 } 503 ridxs[r].rank = p; 504 ridxs[r].index = idxs[r] - owners[p]; 505 } 506 CHKERRQ(PetscSFCreate(map->comm,&sf)); 507 CHKERRQ(PetscSFSetGraph(sf,n,N,NULL,PETSC_OWN_POINTER,ridxs,PETSC_OWN_POINTER)); 508 CHKERRQ(PetscSFReduceBegin(sf,MPIU_INT,(PetscInt*)idxs,lidxs,MPI_LOR)); 509 CHKERRQ(PetscSFReduceEnd(sf,MPIU_INT,(PetscInt*)idxs,lidxs,MPI_LOR)); 510 if (ogidxs) { /* communicate global idxs */ 511 PetscInt cum = 0,start,*work2; 512 513 CHKERRQ(PetscMalloc1(n,&work)); 514 CHKERRQ(PetscCalloc1(N,&work2)); 515 for (r = 0; r < N; ++r) if (idxs[r] >=0) cum++; 516 CHKERRMPI(MPI_Scan(&cum,&start,1,MPIU_INT,MPI_SUM,map->comm)); 517 start -= cum; 518 cum = 0; 519 for (r = 0; r < N; ++r) if (idxs[r] >=0) work2[r] = start+cum++; 520 CHKERRQ(PetscSFReduceBegin(sf,MPIU_INT,work2,work,MPI_REPLACE)); 521 CHKERRQ(PetscSFReduceEnd(sf,MPIU_INT,work2,work,MPI_REPLACE)); 522 CHKERRQ(PetscFree(work2)); 523 } 524 CHKERRQ(PetscSFDestroy(&sf)); 525 /* Compress and put in indices */ 526 for (r = 0; r < n; ++r) 527 if (lidxs[r] >= 0) { 528 if (work) work[len] = work[r]; 529 lidxs[len++] = r; 530 } 531 if (on) *on = len; 532 if (oidxs) *oidxs = lidxs; 533 if (ogidxs) *ogidxs = work; 534 PetscFunctionReturn(0); 535 } 536 537 /*@ 538 PetscSFCreateByMatchingIndices - Create SF by matching root and leaf indices 539 540 Collective 541 542 Input Parameters: 543 + layout - PetscLayout defining the global index space and the rank that brokers each index 544 . numRootIndices - size of rootIndices 545 . rootIndices - PetscInt array of global indices of which this process requests ownership 546 . rootLocalIndices - root local index permutation (NULL if no permutation) 547 . rootLocalOffset - offset to be added to root local indices 548 . numLeafIndices - size of leafIndices 549 . leafIndices - PetscInt array of global indices with which this process requires data associated 550 . leafLocalIndices - leaf local index permutation (NULL if no permutation) 551 - leafLocalOffset - offset to be added to leaf local indices 552 553 Output Parameters: 554 + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 555 - sf - star forest representing the communication pattern from the root space to the leaf space 556 557 Example 1: 558 $ 559 $ rank : 0 1 2 560 $ rootIndices : [1 0 2] [3] [3] 561 $ rootLocalOffset : 100 200 300 562 $ layout : [0 1] [2] [3] 563 $ leafIndices : [0] [2] [0 3] 564 $ leafLocalOffset : 400 500 600 565 $ 566 would build the following SF 567 $ 568 $ [0] 400 <- (0,101) 569 $ [1] 500 <- (0,102) 570 $ [2] 600 <- (0,101) 571 $ [2] 601 <- (2,300) 572 $ 573 Example 2: 574 $ 575 $ rank : 0 1 2 576 $ rootIndices : [1 0 2] [3] [3] 577 $ rootLocalOffset : 100 200 300 578 $ layout : [0 1] [2] [3] 579 $ leafIndices : rootIndices rootIndices rootIndices 580 $ leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 581 $ 582 would build the following SF 583 $ 584 $ [1] 200 <- (2,300) 585 $ 586 Example 3: 587 $ 588 $ No process requests ownership of global index 1, but no process needs it. 589 $ 590 $ rank : 0 1 2 591 $ numRootIndices : 2 1 1 592 $ rootIndices : [0 2] [3] [3] 593 $ rootLocalOffset : 100 200 300 594 $ layout : [0 1] [2] [3] 595 $ numLeafIndices : 1 1 2 596 $ leafIndices : [0] [2] [0 3] 597 $ leafLocalOffset : 400 500 600 598 $ 599 would build the following SF 600 $ 601 $ [0] 400 <- (0,100) 602 $ [1] 500 <- (0,101) 603 $ [2] 600 <- (0,100) 604 $ [2] 601 <- (2,300) 605 $ 606 607 Notes: 608 The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 609 local size can be set to PETSC_DECIDE. 610 If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 611 ownership of x and sends its own rank and the local index of x to process i. 612 If multiple processes request ownership of x, the one with the highest rank is to own x. 613 Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 614 ownership information of x. 615 The output sf is constructed by associating each leaf point to a root point in this way. 616 617 Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 618 The optional output PetscSF object sfA can be used to push such data to leaf points. 619 620 All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 621 must cover that of leafIndices, but need not cover the entire layout. 622 623 If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 624 star forest is almost identity, so will only include non-trivial part of the map. 625 626 Developer Notes: 627 Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 628 hash(rank, root_local_index) as the bid for the ownership determination. 629 630 Level: advanced 631 632 .seealso: PetscSFCreate() 633 @*/ 634 PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 635 { 636 MPI_Comm comm = layout->comm; 637 PetscMPIInt size, rank; 638 PetscSF sf1; 639 PetscSFNode *owners, *buffer, *iremote; 640 PetscInt *ilocal, nleaves, N, n, i; 641 #if defined(PETSC_USE_DEBUG) 642 PetscInt N1; 643 #endif 644 PetscBool flag; 645 646 PetscFunctionBegin; 647 if (rootIndices) PetscValidIntPointer(rootIndices,3); 648 if (rootLocalIndices) PetscValidIntPointer(rootLocalIndices,4); 649 if (leafIndices) PetscValidIntPointer(leafIndices,7); 650 if (leafLocalIndices) PetscValidIntPointer(leafLocalIndices,8); 651 if (sfA) PetscValidPointer(sfA,10); 652 PetscValidPointer(sf,11); 653 PetscCheckFalse(numRootIndices < 0,PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices); 654 PetscCheckFalse(numLeafIndices < 0,PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices); 655 CHKERRMPI(MPI_Comm_size(comm, &size)); 656 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 657 CHKERRQ(PetscLayoutSetUp(layout)); 658 CHKERRQ(PetscLayoutGetSize(layout, &N)); 659 CHKERRQ(PetscLayoutGetLocalSize(layout, &n)); 660 flag = (PetscBool)(leafIndices == rootIndices); 661 CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm)); 662 PetscCheckFalse(flag && numLeafIndices != numRootIndices,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices); 663 #if defined(PETSC_USE_DEBUG) 664 N1 = PETSC_MIN_INT; 665 for (i = 0; i < numRootIndices; i++) if (rootIndices[i] > N1) N1 = rootIndices[i]; 666 CHKERRMPI(MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 667 PetscCheckFalse(N1 >= N,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 668 if (!flag) { 669 N1 = PETSC_MIN_INT; 670 for (i = 0; i < numLeafIndices; i++) if (leafIndices[i] > N1) N1 = leafIndices[i]; 671 CHKERRMPI(MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm)); 672 PetscCheckFalse(N1 >= N,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N); 673 } 674 #endif 675 /* Reduce: owners -> buffer */ 676 CHKERRQ(PetscMalloc1(n, &buffer)); 677 CHKERRQ(PetscSFCreate(comm, &sf1)); 678 CHKERRQ(PetscSFSetFromOptions(sf1)); 679 CHKERRQ(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices)); 680 CHKERRQ(PetscMalloc1(numRootIndices, &owners)); 681 for (i = 0; i < numRootIndices; ++i) { 682 owners[i].rank = rank; 683 owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 684 } 685 for (i = 0; i < n; ++i) { 686 buffer[i].index = -1; 687 buffer[i].rank = -1; 688 } 689 CHKERRQ(PetscSFReduceBegin(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 690 CHKERRQ(PetscSFReduceEnd(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC)); 691 /* Bcast: buffer -> owners */ 692 if (!flag) { 693 /* leafIndices is different from rootIndices */ 694 CHKERRQ(PetscFree(owners)); 695 CHKERRQ(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices)); 696 CHKERRQ(PetscMalloc1(numLeafIndices, &owners)); 697 } 698 CHKERRQ(PetscSFBcastBegin(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 699 CHKERRQ(PetscSFBcastEnd(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE)); 700 for (i = 0; i < numLeafIndices; ++i) PetscCheckFalse(owners[i].rank < 0,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]); 701 CHKERRQ(PetscFree(buffer)); 702 if (sfA) {*sfA = sf1;} 703 else CHKERRQ(PetscSFDestroy(&sf1)); 704 /* Create sf */ 705 if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 706 /* leaf space == root space */ 707 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) if (owners[i].rank != rank) ++nleaves; 708 CHKERRQ(PetscMalloc1(nleaves, &ilocal)); 709 CHKERRQ(PetscMalloc1(nleaves, &iremote)); 710 for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 711 if (owners[i].rank != rank) { 712 ilocal[nleaves] = leafLocalOffset + i; 713 iremote[nleaves].rank = owners[i].rank; 714 iremote[nleaves].index = owners[i].index; 715 ++nleaves; 716 } 717 } 718 CHKERRQ(PetscFree(owners)); 719 } else { 720 nleaves = numLeafIndices; 721 CHKERRQ(PetscMalloc1(nleaves, &ilocal)); 722 for (i = 0; i < nleaves; ++i) {ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);} 723 iremote = owners; 724 } 725 CHKERRQ(PetscSFCreate(comm, sf)); 726 CHKERRQ(PetscSFSetFromOptions(*sf)); 727 CHKERRQ(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 728 PetscFunctionReturn(0); 729 } 730