1b0c7db22SLisandro Dalcin #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2b0c7db22SLisandro Dalcin #include <petsc/private/sectionimpl.h> 3b0c7db22SLisandro Dalcin 4b0c7db22SLisandro Dalcin /*@C 5b0c7db22SLisandro Dalcin PetscSFSetGraphLayout - Set a parallel star forest via global indices and a PetscLayout 6b0c7db22SLisandro Dalcin 7b0c7db22SLisandro Dalcin Collective 8b0c7db22SLisandro Dalcin 9b0c7db22SLisandro Dalcin Input Arguments: 10b0c7db22SLisandro Dalcin + sf - star forest 11b0c7db22SLisandro Dalcin . layout - PetscLayout defining the global space 12b0c7db22SLisandro Dalcin . nleaves - number of leaf vertices on the current process, each of these references a root on any process 13b0c7db22SLisandro Dalcin . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage 14b0c7db22SLisandro Dalcin . localmode - copy mode for ilocal 15b0c7db22SLisandro Dalcin - iremote - remote locations of root vertices for each leaf on the current process 16b0c7db22SLisandro Dalcin 17b0c7db22SLisandro Dalcin Level: intermediate 18b0c7db22SLisandro Dalcin 19b0c7db22SLisandro Dalcin Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 20b0c7db22SLisandro Dalcin encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 21b0c7db22SLisandro Dalcin needed 22b0c7db22SLisandro Dalcin 23b0c7db22SLisandro Dalcin .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 24b0c7db22SLisandro Dalcin @*/ 25b0c7db22SLisandro Dalcin PetscErrorCode PetscSFSetGraphLayout(PetscSF sf,PetscLayout layout,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscInt *iremote) 26b0c7db22SLisandro Dalcin { 27b0c7db22SLisandro Dalcin PetscErrorCode ierr; 2838a25198SStefano Zampini const PetscInt *range; 2938a25198SStefano Zampini PetscInt i, nroots, ls = -1, ln = -1; 3038a25198SStefano Zampini PetscMPIInt lr = -1; 31b0c7db22SLisandro Dalcin PetscSFNode *remote; 32b0c7db22SLisandro Dalcin 33b0c7db22SLisandro Dalcin PetscFunctionBegin; 34b0c7db22SLisandro Dalcin ierr = PetscLayoutGetLocalSize(layout,&nroots);CHKERRQ(ierr); 3538a25198SStefano Zampini ierr = PetscLayoutGetRanges(layout,&range);CHKERRQ(ierr); 36b0c7db22SLisandro Dalcin ierr = PetscMalloc1(nleaves,&remote);CHKERRQ(ierr); 3738a25198SStefano Zampini if (nleaves) { ls = iremote[0] + 1; } 38b0c7db22SLisandro Dalcin for (i=0; i<nleaves; i++) { 3938a25198SStefano Zampini const PetscInt idx = iremote[i] - ls; 4038a25198SStefano Zampini if (idx < 0 || idx >= ln) { /* short-circuit the search */ 4138a25198SStefano Zampini ierr = PetscLayoutFindOwnerIndex(layout,iremote[i],&lr,&remote[i].index);CHKERRQ(ierr); 4238a25198SStefano Zampini remote[i].rank = lr; 4338a25198SStefano Zampini ls = range[lr]; 4438a25198SStefano Zampini ln = range[lr+1] - ls; 4538a25198SStefano Zampini } else { 4638a25198SStefano Zampini remote[i].rank = lr; 4738a25198SStefano Zampini remote[i].index = idx; 4838a25198SStefano Zampini } 49b0c7db22SLisandro Dalcin } 50b0c7db22SLisandro Dalcin ierr = PetscSFSetGraph(sf,nroots,nleaves,ilocal,localmode,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 51b0c7db22SLisandro Dalcin PetscFunctionReturn(0); 52b0c7db22SLisandro Dalcin } 53b0c7db22SLisandro Dalcin 54b0c7db22SLisandro Dalcin /*@ 55b0c7db22SLisandro Dalcin PetscSFSetGraphSection - Sets the PetscSF graph encoding the parallel dof overlap based upon the PetscSections 56b0c7db22SLisandro Dalcin describing the data layout. 57b0c7db22SLisandro Dalcin 58b0c7db22SLisandro Dalcin Input Parameters: 59b0c7db22SLisandro Dalcin + sf - The SF 60b0c7db22SLisandro Dalcin . localSection - PetscSection describing the local data layout 61b0c7db22SLisandro Dalcin - globalSection - PetscSection describing the global data layout 62b0c7db22SLisandro Dalcin 63b0c7db22SLisandro Dalcin Level: developer 64b0c7db22SLisandro Dalcin 65b0c7db22SLisandro Dalcin .seealso: PetscSFSetGraph(), PetscSFSetGraphLayout() 66b0c7db22SLisandro Dalcin @*/ 67b0c7db22SLisandro Dalcin PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection) 68b0c7db22SLisandro Dalcin { 69b0c7db22SLisandro Dalcin MPI_Comm comm; 70b0c7db22SLisandro Dalcin PetscLayout layout; 71b0c7db22SLisandro Dalcin const PetscInt *ranges; 72b0c7db22SLisandro Dalcin PetscInt *local; 73b0c7db22SLisandro Dalcin PetscSFNode *remote; 74b0c7db22SLisandro Dalcin PetscInt pStart, pEnd, p, nroots, nleaves = 0, l; 75b0c7db22SLisandro Dalcin PetscMPIInt size, rank; 76b0c7db22SLisandro Dalcin PetscErrorCode ierr; 77b0c7db22SLisandro Dalcin 78b0c7db22SLisandro Dalcin PetscFunctionBegin; 79b0c7db22SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 80b0c7db22SLisandro Dalcin PetscValidHeaderSpecific(localSection, PETSC_SECTION_CLASSID, 2); 81b0c7db22SLisandro Dalcin PetscValidHeaderSpecific(globalSection, PETSC_SECTION_CLASSID, 3); 82b0c7db22SLisandro Dalcin 83b0c7db22SLisandro Dalcin ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 8455b25c41SPierre Jolivet ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 8555b25c41SPierre Jolivet ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 86b0c7db22SLisandro Dalcin ierr = PetscSectionGetChart(globalSection, &pStart, &pEnd);CHKERRQ(ierr); 87b0c7db22SLisandro Dalcin ierr = PetscSectionGetConstrainedStorageSize(globalSection, &nroots);CHKERRQ(ierr); 88b0c7db22SLisandro Dalcin ierr = PetscLayoutCreate(comm, &layout);CHKERRQ(ierr); 89b0c7db22SLisandro Dalcin ierr = PetscLayoutSetBlockSize(layout, 1);CHKERRQ(ierr); 90b0c7db22SLisandro Dalcin ierr = PetscLayoutSetLocalSize(layout, nroots);CHKERRQ(ierr); 91b0c7db22SLisandro Dalcin ierr = PetscLayoutSetUp(layout);CHKERRQ(ierr); 92b0c7db22SLisandro Dalcin ierr = PetscLayoutGetRanges(layout, &ranges);CHKERRQ(ierr); 93b0c7db22SLisandro Dalcin for (p = pStart; p < pEnd; ++p) { 94b0c7db22SLisandro Dalcin PetscInt gdof, gcdof; 95b0c7db22SLisandro Dalcin 96b0c7db22SLisandro Dalcin ierr = PetscSectionGetDof(globalSection, p, &gdof);CHKERRQ(ierr); 97b0c7db22SLisandro Dalcin ierr = PetscSectionGetConstraintDof(globalSection, p, &gcdof);CHKERRQ(ierr); 98b0c7db22SLisandro Dalcin if (gcdof > (gdof < 0 ? -(gdof+1) : gdof)) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %D has %D constraints > %D dof", p, gcdof, (gdof < 0 ? -(gdof+1) : gdof)); 99b0c7db22SLisandro Dalcin nleaves += gdof < 0 ? -(gdof+1)-gcdof : gdof-gcdof; 100b0c7db22SLisandro Dalcin } 101b0c7db22SLisandro Dalcin ierr = PetscMalloc1(nleaves, &local);CHKERRQ(ierr); 102b0c7db22SLisandro Dalcin ierr = PetscMalloc1(nleaves, &remote);CHKERRQ(ierr); 103b0c7db22SLisandro Dalcin for (p = pStart, l = 0; p < pEnd; ++p) { 104b0c7db22SLisandro Dalcin const PetscInt *cind; 105b0c7db22SLisandro Dalcin PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c; 106b0c7db22SLisandro Dalcin 107b0c7db22SLisandro Dalcin ierr = PetscSectionGetDof(localSection, p, &dof);CHKERRQ(ierr); 108b0c7db22SLisandro Dalcin ierr = PetscSectionGetOffset(localSection, p, &off);CHKERRQ(ierr); 109b0c7db22SLisandro Dalcin ierr = PetscSectionGetConstraintDof(localSection, p, &cdof);CHKERRQ(ierr); 110b0c7db22SLisandro Dalcin ierr = PetscSectionGetConstraintIndices(localSection, p, &cind);CHKERRQ(ierr); 111b0c7db22SLisandro Dalcin ierr = PetscSectionGetDof(globalSection, p, &gdof);CHKERRQ(ierr); 112b0c7db22SLisandro Dalcin ierr = PetscSectionGetConstraintDof(globalSection, p, &gcdof);CHKERRQ(ierr); 113b0c7db22SLisandro Dalcin ierr = PetscSectionGetOffset(globalSection, p, &goff);CHKERRQ(ierr); 114b0c7db22SLisandro Dalcin if (!gdof) continue; /* Censored point */ 115b0c7db22SLisandro Dalcin gsize = gdof < 0 ? -(gdof+1)-gcdof : gdof-gcdof; 116b0c7db22SLisandro Dalcin if (gsize != dof-cdof) { 117b0c7db22SLisandro Dalcin if (gsize != dof) SETERRQ4(comm, PETSC_ERR_ARG_WRONG, "Global dof %D for point %D is neither the constrained size %D, nor the unconstrained %D", gsize, p, dof-cdof, dof); 118b0c7db22SLisandro Dalcin cdof = 0; /* Ignore constraints */ 119b0c7db22SLisandro Dalcin } 120b0c7db22SLisandro Dalcin for (d = 0, c = 0; d < dof; ++d) { 121b0c7db22SLisandro Dalcin if ((c < cdof) && (cind[c] == d)) {++c; continue;} 122b0c7db22SLisandro Dalcin local[l+d-c] = off+d; 123b0c7db22SLisandro Dalcin } 124b0c7db22SLisandro Dalcin if (gdof < 0) { 125b0c7db22SLisandro Dalcin for (d = 0; d < gsize; ++d, ++l) { 126b0c7db22SLisandro Dalcin PetscInt offset = -(goff+1) + d, r; 127b0c7db22SLisandro Dalcin 128b0c7db22SLisandro Dalcin ierr = PetscFindInt(offset,size+1,ranges,&r);CHKERRQ(ierr); 129b0c7db22SLisandro Dalcin if (r < 0) r = -(r+2); 130b0c7db22SLisandro Dalcin if ((r < 0) || (r >= size)) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %D mapped to invalid process %D (%D, %D)", p, r, gdof, goff); 131b0c7db22SLisandro Dalcin remote[l].rank = r; 132b0c7db22SLisandro Dalcin remote[l].index = offset - ranges[r]; 133b0c7db22SLisandro Dalcin } 134b0c7db22SLisandro Dalcin } else { 135b0c7db22SLisandro Dalcin for (d = 0; d < gsize; ++d, ++l) { 136b0c7db22SLisandro Dalcin remote[l].rank = rank; 137b0c7db22SLisandro Dalcin remote[l].index = goff+d - ranges[rank]; 138b0c7db22SLisandro Dalcin } 139b0c7db22SLisandro Dalcin } 140b0c7db22SLisandro Dalcin } 141b0c7db22SLisandro Dalcin if (l != nleaves) SETERRQ2(comm, PETSC_ERR_PLIB, "Iteration error, l %D != nleaves %D", l, nleaves); 142b0c7db22SLisandro Dalcin ierr = PetscLayoutDestroy(&layout);CHKERRQ(ierr); 143b0c7db22SLisandro Dalcin ierr = PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER);CHKERRQ(ierr); 144b0c7db22SLisandro Dalcin PetscFunctionReturn(0); 145b0c7db22SLisandro Dalcin } 146b0c7db22SLisandro Dalcin 147b0c7db22SLisandro Dalcin static PetscErrorCode PetscSectionCheckConstraints_Static(PetscSection s) 148b0c7db22SLisandro Dalcin { 149b0c7db22SLisandro Dalcin PetscErrorCode ierr; 150b0c7db22SLisandro Dalcin 151b0c7db22SLisandro Dalcin PetscFunctionBegin; 152b0c7db22SLisandro Dalcin if (!s->bc) { 153b0c7db22SLisandro Dalcin ierr = PetscSectionCreate(PETSC_COMM_SELF, &s->bc);CHKERRQ(ierr); 154b0c7db22SLisandro Dalcin ierr = PetscSectionSetChart(s->bc, s->pStart, s->pEnd);CHKERRQ(ierr); 155b0c7db22SLisandro Dalcin } 156b0c7db22SLisandro Dalcin PetscFunctionReturn(0); 157b0c7db22SLisandro Dalcin } 158b0c7db22SLisandro Dalcin 159b0c7db22SLisandro Dalcin /*@C 160b0c7db22SLisandro Dalcin PetscSFDistributeSection - Create a new PetscSection reorganized, moving from the root to the leaves of the SF 161b0c7db22SLisandro Dalcin 162b0c7db22SLisandro Dalcin Collective on sf 163b0c7db22SLisandro Dalcin 164b0c7db22SLisandro Dalcin Input Parameters: 165b0c7db22SLisandro Dalcin + sf - The SF 166b0c7db22SLisandro Dalcin - rootSection - Section defined on root space 167b0c7db22SLisandro Dalcin 168b0c7db22SLisandro Dalcin Output Parameters: 169b0c7db22SLisandro Dalcin + remoteOffsets - root offsets in leaf storage, or NULL 170b0c7db22SLisandro Dalcin - leafSection - Section defined on the leaf space 171b0c7db22SLisandro Dalcin 172b0c7db22SLisandro Dalcin Level: advanced 173b0c7db22SLisandro Dalcin 174b0c7db22SLisandro Dalcin .seealso: PetscSFCreate() 175b0c7db22SLisandro Dalcin @*/ 176b0c7db22SLisandro Dalcin PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt **remoteOffsets, PetscSection leafSection) 177b0c7db22SLisandro Dalcin { 178b0c7db22SLisandro Dalcin PetscSF embedSF; 179b0c7db22SLisandro Dalcin const PetscInt *indices; 180b0c7db22SLisandro Dalcin IS selected; 181b0c7db22SLisandro Dalcin PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_MAX_INT, lpEnd = -1, f, c; 182b0c7db22SLisandro Dalcin PetscBool *sub, hasc; 183b0c7db22SLisandro Dalcin PetscErrorCode ierr; 184b0c7db22SLisandro Dalcin 185b0c7db22SLisandro Dalcin PetscFunctionBegin; 186b0c7db22SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_DistSect,sf,0,0,0);CHKERRQ(ierr); 187b0c7db22SLisandro Dalcin ierr = PetscSectionGetNumFields(rootSection, &numFields);CHKERRQ(ierr); 188b0c7db22SLisandro Dalcin if (numFields) {ierr = PetscSectionSetNumFields(leafSection, numFields);CHKERRQ(ierr);} 189b0c7db22SLisandro Dalcin ierr = PetscMalloc1(numFields+2, &sub);CHKERRQ(ierr); 190b0c7db22SLisandro Dalcin sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE; 191b0c7db22SLisandro Dalcin for (f = 0; f < numFields; ++f) { 192b0c7db22SLisandro Dalcin PetscSectionSym sym; 193b0c7db22SLisandro Dalcin const char *name = NULL; 194b0c7db22SLisandro Dalcin PetscInt numComp = 0; 195b0c7db22SLisandro Dalcin 196b0c7db22SLisandro Dalcin sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE; 197b0c7db22SLisandro Dalcin ierr = PetscSectionGetFieldComponents(rootSection, f, &numComp);CHKERRQ(ierr); 198b0c7db22SLisandro Dalcin ierr = PetscSectionGetFieldName(rootSection, f, &name);CHKERRQ(ierr); 199b0c7db22SLisandro Dalcin ierr = PetscSectionGetFieldSym(rootSection, f, &sym);CHKERRQ(ierr); 200b0c7db22SLisandro Dalcin ierr = PetscSectionSetFieldComponents(leafSection, f, numComp);CHKERRQ(ierr); 201b0c7db22SLisandro Dalcin ierr = PetscSectionSetFieldName(leafSection, f, name);CHKERRQ(ierr); 202b0c7db22SLisandro Dalcin ierr = PetscSectionSetFieldSym(leafSection, f, sym);CHKERRQ(ierr); 203b0c7db22SLisandro Dalcin for (c = 0; c < rootSection->numFieldComponents[f]; ++c) { 204b0c7db22SLisandro Dalcin ierr = PetscSectionGetComponentName(rootSection, f, c, &name);CHKERRQ(ierr); 205b0c7db22SLisandro Dalcin ierr = PetscSectionSetComponentName(leafSection, f, c, name);CHKERRQ(ierr); 206b0c7db22SLisandro Dalcin } 207b0c7db22SLisandro Dalcin } 208b0c7db22SLisandro Dalcin ierr = PetscSectionGetChart(rootSection, &rpStart, &rpEnd);CHKERRQ(ierr); 209b0c7db22SLisandro Dalcin ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 210b0c7db22SLisandro Dalcin rpEnd = PetscMin(rpEnd,nroots); 211b0c7db22SLisandro Dalcin rpEnd = PetscMax(rpStart,rpEnd); 212b0c7db22SLisandro Dalcin /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */ 213b0c7db22SLisandro Dalcin sub[0] = (PetscBool)(nroots != rpEnd - rpStart); 214b0c7db22SLisandro Dalcin ierr = MPIU_Allreduce(MPI_IN_PLACE, sub, 2+numFields, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf));CHKERRQ(ierr); 215b0c7db22SLisandro Dalcin if (sub[0]) { 216b0c7db22SLisandro Dalcin ierr = ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected);CHKERRQ(ierr); 217b0c7db22SLisandro Dalcin ierr = ISGetIndices(selected, &indices);CHKERRQ(ierr); 21872502a1fSJunchao Zhang ierr = PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF);CHKERRQ(ierr); 219b0c7db22SLisandro Dalcin ierr = ISRestoreIndices(selected, &indices);CHKERRQ(ierr); 220b0c7db22SLisandro Dalcin ierr = ISDestroy(&selected);CHKERRQ(ierr); 221b0c7db22SLisandro Dalcin } else { 222b0c7db22SLisandro Dalcin ierr = PetscObjectReference((PetscObject)sf);CHKERRQ(ierr); 223b0c7db22SLisandro Dalcin embedSF = sf; 224b0c7db22SLisandro Dalcin } 225b0c7db22SLisandro Dalcin ierr = PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd);CHKERRQ(ierr); 226b0c7db22SLisandro Dalcin lpEnd++; 227b0c7db22SLisandro Dalcin 228b0c7db22SLisandro Dalcin ierr = PetscSectionSetChart(leafSection, lpStart, lpEnd);CHKERRQ(ierr); 229b0c7db22SLisandro Dalcin 230b0c7db22SLisandro Dalcin /* Constrained dof section */ 231b0c7db22SLisandro Dalcin hasc = sub[1]; 232b0c7db22SLisandro Dalcin for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2+f]); 233b0c7db22SLisandro Dalcin 234b0c7db22SLisandro Dalcin /* Could fuse these at the cost of copies and extra allocation */ 235ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 236ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasDof[-rpStart], &leafSection->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 237b0c7db22SLisandro Dalcin if (sub[1]) { 238b0c7db22SLisandro Dalcin ierr = PetscSectionCheckConstraints_Static(rootSection);CHKERRQ(ierr); 239b0c7db22SLisandro Dalcin ierr = PetscSectionCheckConstraints_Static(leafSection);CHKERRQ(ierr); 240ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 241ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 242b0c7db22SLisandro Dalcin } 243b0c7db22SLisandro Dalcin for (f = 0; f < numFields; ++f) { 244ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 245ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->atlasDof[-rpStart], &leafSection->field[f]->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 246b0c7db22SLisandro Dalcin if (sub[2+f]) { 247b0c7db22SLisandro Dalcin ierr = PetscSectionCheckConstraints_Static(rootSection->field[f]);CHKERRQ(ierr); 248b0c7db22SLisandro Dalcin ierr = PetscSectionCheckConstraints_Static(leafSection->field[f]);CHKERRQ(ierr); 249ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 250ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 251b0c7db22SLisandro Dalcin } 252b0c7db22SLisandro Dalcin } 253b0c7db22SLisandro Dalcin if (remoteOffsets) { 254b0c7db22SLisandro Dalcin ierr = PetscMalloc1(lpEnd - lpStart, remoteOffsets);CHKERRQ(ierr); 255ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 256ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 257b0c7db22SLisandro Dalcin } 258b0c7db22SLisandro Dalcin ierr = PetscSectionSetUp(leafSection);CHKERRQ(ierr); 259b0c7db22SLisandro Dalcin if (hasc) { /* need to communicate bcIndices */ 260b0c7db22SLisandro Dalcin PetscSF bcSF; 261b0c7db22SLisandro Dalcin PetscInt *rOffBc; 262b0c7db22SLisandro Dalcin 263b0c7db22SLisandro Dalcin ierr = PetscMalloc1(lpEnd - lpStart, &rOffBc);CHKERRQ(ierr); 264b0c7db22SLisandro Dalcin if (sub[1]) { 265ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 266ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 267b0c7db22SLisandro Dalcin ierr = PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF);CHKERRQ(ierr); 268ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices,MPI_REPLACE);CHKERRQ(ierr); 269ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices,MPI_REPLACE);CHKERRQ(ierr); 270b0c7db22SLisandro Dalcin ierr = PetscSFDestroy(&bcSF);CHKERRQ(ierr); 271b0c7db22SLisandro Dalcin } 272b0c7db22SLisandro Dalcin for (f = 0; f < numFields; ++f) { 273b0c7db22SLisandro Dalcin if (sub[2+f]) { 274ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 275ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 276b0c7db22SLisandro Dalcin ierr = PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF);CHKERRQ(ierr); 277ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices,MPI_REPLACE);CHKERRQ(ierr); 278ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices,MPI_REPLACE);CHKERRQ(ierr); 279b0c7db22SLisandro Dalcin ierr = PetscSFDestroy(&bcSF);CHKERRQ(ierr); 280b0c7db22SLisandro Dalcin } 281b0c7db22SLisandro Dalcin } 282b0c7db22SLisandro Dalcin ierr = PetscFree(rOffBc);CHKERRQ(ierr); 283b0c7db22SLisandro Dalcin } 284b0c7db22SLisandro Dalcin ierr = PetscSFDestroy(&embedSF);CHKERRQ(ierr); 285b0c7db22SLisandro Dalcin ierr = PetscFree(sub);CHKERRQ(ierr); 286b0c7db22SLisandro Dalcin ierr = PetscLogEventEnd(PETSCSF_DistSect,sf,0,0,0);CHKERRQ(ierr); 287b0c7db22SLisandro Dalcin PetscFunctionReturn(0); 288b0c7db22SLisandro Dalcin } 289b0c7db22SLisandro Dalcin 290b0c7db22SLisandro Dalcin /*@C 291b0c7db22SLisandro Dalcin PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes 292b0c7db22SLisandro Dalcin 293b0c7db22SLisandro Dalcin Collective on sf 294b0c7db22SLisandro Dalcin 295b0c7db22SLisandro Dalcin Input Parameters: 296b0c7db22SLisandro Dalcin + sf - The SF 297b0c7db22SLisandro Dalcin . rootSection - Data layout of remote points for outgoing data (this is layout for SF roots) 298b0c7db22SLisandro Dalcin - leafSection - Data layout of local points for incoming data (this is layout for SF leaves) 299b0c7db22SLisandro Dalcin 300b0c7db22SLisandro Dalcin Output Parameter: 301b0c7db22SLisandro Dalcin . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 302b0c7db22SLisandro Dalcin 303b0c7db22SLisandro Dalcin Level: developer 304b0c7db22SLisandro Dalcin 305b0c7db22SLisandro Dalcin .seealso: PetscSFCreate() 306b0c7db22SLisandro Dalcin @*/ 307b0c7db22SLisandro Dalcin PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt **remoteOffsets) 308b0c7db22SLisandro Dalcin { 309b0c7db22SLisandro Dalcin PetscSF embedSF; 310b0c7db22SLisandro Dalcin const PetscInt *indices; 311b0c7db22SLisandro Dalcin IS selected; 312b0c7db22SLisandro Dalcin PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0; 313b0c7db22SLisandro Dalcin PetscErrorCode ierr; 314b0c7db22SLisandro Dalcin 315b0c7db22SLisandro Dalcin PetscFunctionBegin; 316b0c7db22SLisandro Dalcin *remoteOffsets = NULL; 317b0c7db22SLisandro Dalcin ierr = PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL);CHKERRQ(ierr); 318b0c7db22SLisandro Dalcin if (numRoots < 0) PetscFunctionReturn(0); 319b0c7db22SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_RemoteOff,sf,0,0,0);CHKERRQ(ierr); 320b0c7db22SLisandro Dalcin ierr = PetscSectionGetChart(rootSection, &rpStart, &rpEnd);CHKERRQ(ierr); 321b0c7db22SLisandro Dalcin ierr = PetscSectionGetChart(leafSection, &lpStart, &lpEnd);CHKERRQ(ierr); 322b0c7db22SLisandro Dalcin ierr = ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected);CHKERRQ(ierr); 323b0c7db22SLisandro Dalcin ierr = ISGetIndices(selected, &indices);CHKERRQ(ierr); 32472502a1fSJunchao Zhang ierr = PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF);CHKERRQ(ierr); 325b0c7db22SLisandro Dalcin ierr = ISRestoreIndices(selected, &indices);CHKERRQ(ierr); 326b0c7db22SLisandro Dalcin ierr = ISDestroy(&selected);CHKERRQ(ierr); 327b0c7db22SLisandro Dalcin ierr = PetscCalloc1(lpEnd - lpStart, remoteOffsets);CHKERRQ(ierr); 328ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 329ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->atlasOff[-rpStart], &(*remoteOffsets)[-lpStart],MPI_REPLACE);CHKERRQ(ierr); 330b0c7db22SLisandro Dalcin ierr = PetscSFDestroy(&embedSF);CHKERRQ(ierr); 331b0c7db22SLisandro Dalcin ierr = PetscLogEventEnd(PETSCSF_RemoteOff,sf,0,0,0);CHKERRQ(ierr); 332b0c7db22SLisandro Dalcin PetscFunctionReturn(0); 333b0c7db22SLisandro Dalcin } 334b0c7db22SLisandro Dalcin 335b0c7db22SLisandro Dalcin /*@C 336b0c7db22SLisandro Dalcin PetscSFCreateSectionSF - Create an expanded SF of dofs, assuming the input SF relates points 337b0c7db22SLisandro Dalcin 338b0c7db22SLisandro Dalcin Collective on sf 339b0c7db22SLisandro Dalcin 340b0c7db22SLisandro Dalcin Input Parameters: 341b0c7db22SLisandro Dalcin + sf - The SF 342b0c7db22SLisandro Dalcin . rootSection - Data layout of remote points for outgoing data (this is usually the serial section) 343b0c7db22SLisandro Dalcin . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or NULL 344b0c7db22SLisandro Dalcin - leafSection - Data layout of local points for incoming data (this is the distributed section) 345b0c7db22SLisandro Dalcin 346b0c7db22SLisandro Dalcin Output Parameters: 347b0c7db22SLisandro Dalcin - sectionSF - The new SF 348b0c7db22SLisandro Dalcin 349b0c7db22SLisandro Dalcin Note: Either rootSection or remoteOffsets can be specified 350b0c7db22SLisandro Dalcin 351b0c7db22SLisandro Dalcin Level: advanced 352b0c7db22SLisandro Dalcin 353b0c7db22SLisandro Dalcin .seealso: PetscSFCreate() 354b0c7db22SLisandro Dalcin @*/ 355b0c7db22SLisandro Dalcin PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF) 356b0c7db22SLisandro Dalcin { 357b0c7db22SLisandro Dalcin MPI_Comm comm; 358b0c7db22SLisandro Dalcin const PetscInt *localPoints; 359b0c7db22SLisandro Dalcin const PetscSFNode *remotePoints; 360b0c7db22SLisandro Dalcin PetscInt lpStart, lpEnd; 361b0c7db22SLisandro Dalcin PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0; 362b0c7db22SLisandro Dalcin PetscInt *localIndices; 363b0c7db22SLisandro Dalcin PetscSFNode *remoteIndices; 364b0c7db22SLisandro Dalcin PetscInt i, ind; 365b0c7db22SLisandro Dalcin PetscErrorCode ierr; 366b0c7db22SLisandro Dalcin 367b0c7db22SLisandro Dalcin PetscFunctionBegin; 368b0c7db22SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 369b0c7db22SLisandro Dalcin PetscValidPointer(rootSection,2); 370b0c7db22SLisandro Dalcin /* Cannot check PetscValidIntPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */ 371b0c7db22SLisandro Dalcin PetscValidPointer(leafSection,4); 372b0c7db22SLisandro Dalcin PetscValidPointer(sectionSF,5); 373b0c7db22SLisandro Dalcin ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 374b0c7db22SLisandro Dalcin ierr = PetscSFCreate(comm, sectionSF);CHKERRQ(ierr); 375b0c7db22SLisandro Dalcin ierr = PetscSectionGetChart(leafSection, &lpStart, &lpEnd);CHKERRQ(ierr); 376b0c7db22SLisandro Dalcin ierr = PetscSectionGetStorageSize(rootSection, &numSectionRoots);CHKERRQ(ierr); 377b0c7db22SLisandro Dalcin ierr = PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints);CHKERRQ(ierr); 378b0c7db22SLisandro Dalcin if (numRoots < 0) PetscFunctionReturn(0); 379b0c7db22SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_SectSF,sf,0,0,0);CHKERRQ(ierr); 380b0c7db22SLisandro Dalcin for (i = 0; i < numPoints; ++i) { 381b0c7db22SLisandro Dalcin PetscInt localPoint = localPoints ? localPoints[i] : i; 382b0c7db22SLisandro Dalcin PetscInt dof; 383b0c7db22SLisandro Dalcin 384b0c7db22SLisandro Dalcin if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 385b0c7db22SLisandro Dalcin ierr = PetscSectionGetDof(leafSection, localPoint, &dof);CHKERRQ(ierr); 386b0c7db22SLisandro Dalcin numIndices += dof; 387b0c7db22SLisandro Dalcin } 388b0c7db22SLisandro Dalcin } 389b0c7db22SLisandro Dalcin ierr = PetscMalloc1(numIndices, &localIndices);CHKERRQ(ierr); 390b0c7db22SLisandro Dalcin ierr = PetscMalloc1(numIndices, &remoteIndices);CHKERRQ(ierr); 391b0c7db22SLisandro Dalcin /* Create new index graph */ 392b0c7db22SLisandro Dalcin for (i = 0, ind = 0; i < numPoints; ++i) { 393b0c7db22SLisandro Dalcin PetscInt localPoint = localPoints ? localPoints[i] : i; 394b0c7db22SLisandro Dalcin PetscInt rank = remotePoints[i].rank; 395b0c7db22SLisandro Dalcin 396b0c7db22SLisandro Dalcin if ((localPoint >= lpStart) && (localPoint < lpEnd)) { 397b0c7db22SLisandro Dalcin PetscInt remoteOffset = remoteOffsets[localPoint-lpStart]; 398b0c7db22SLisandro Dalcin PetscInt loff, dof, d; 399b0c7db22SLisandro Dalcin 400b0c7db22SLisandro Dalcin ierr = PetscSectionGetOffset(leafSection, localPoint, &loff);CHKERRQ(ierr); 401b0c7db22SLisandro Dalcin ierr = PetscSectionGetDof(leafSection, localPoint, &dof);CHKERRQ(ierr); 402b0c7db22SLisandro Dalcin for (d = 0; d < dof; ++d, ++ind) { 403b0c7db22SLisandro Dalcin localIndices[ind] = loff+d; 404b0c7db22SLisandro Dalcin remoteIndices[ind].rank = rank; 405b0c7db22SLisandro Dalcin remoteIndices[ind].index = remoteOffset+d; 406b0c7db22SLisandro Dalcin } 407b0c7db22SLisandro Dalcin } 408b0c7db22SLisandro Dalcin } 409b0c7db22SLisandro Dalcin if (numIndices != ind) SETERRQ2(comm, PETSC_ERR_PLIB, "Inconsistency in indices, %D should be %D", ind, numIndices); 410b0c7db22SLisandro Dalcin ierr = PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER);CHKERRQ(ierr); 411b0c7db22SLisandro Dalcin ierr = PetscSFSetUp(*sectionSF);CHKERRQ(ierr); 412b0c7db22SLisandro Dalcin ierr = PetscLogEventEnd(PETSCSF_SectSF,sf,0,0,0);CHKERRQ(ierr); 413b0c7db22SLisandro Dalcin PetscFunctionReturn(0); 414b0c7db22SLisandro Dalcin } 4158fa9e22eSVaclav Hapla 4168fa9e22eSVaclav Hapla /*@C 4178fa9e22eSVaclav Hapla PetscSFCreateFromLayouts - Creates a parallel star forest mapping two PetscLayout objects 4188fa9e22eSVaclav Hapla 4198fa9e22eSVaclav Hapla Collective 4208fa9e22eSVaclav Hapla 4218fa9e22eSVaclav Hapla Input Arguments: 4228fa9e22eSVaclav Hapla + rmap - PetscLayout defining the global root space 4238fa9e22eSVaclav Hapla - lmap - PetscLayout defining the global leaf space 4248fa9e22eSVaclav Hapla 4258fa9e22eSVaclav Hapla Output Arguments: 4268fa9e22eSVaclav Hapla . sf - The parallel star forest 4278fa9e22eSVaclav Hapla 4288fa9e22eSVaclav Hapla Level: intermediate 4298fa9e22eSVaclav Hapla 4308fa9e22eSVaclav Hapla .seealso: PetscSFCreate(), PetscLayoutCreate(), PetscSFSetGraphLayout() 4318fa9e22eSVaclav Hapla @*/ 4328fa9e22eSVaclav Hapla PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF* sf) 4338fa9e22eSVaclav Hapla { 4348fa9e22eSVaclav Hapla PetscErrorCode ierr; 4358fa9e22eSVaclav Hapla PetscInt i,nroots,nleaves = 0; 4368fa9e22eSVaclav Hapla PetscInt rN, lst, len; 4378fa9e22eSVaclav Hapla PetscMPIInt owner = -1; 4388fa9e22eSVaclav Hapla PetscSFNode *remote; 4398fa9e22eSVaclav Hapla MPI_Comm rcomm = rmap->comm; 4408fa9e22eSVaclav Hapla MPI_Comm lcomm = lmap->comm; 4418fa9e22eSVaclav Hapla PetscMPIInt flg; 4428fa9e22eSVaclav Hapla 4438fa9e22eSVaclav Hapla PetscFunctionBegin; 4448fa9e22eSVaclav Hapla PetscValidPointer(sf,3); 4458fa9e22eSVaclav Hapla if (!rmap->setupcalled) SETERRQ(rcomm,PETSC_ERR_ARG_WRONGSTATE,"Root layout not setup"); 4468fa9e22eSVaclav Hapla if (!lmap->setupcalled) SETERRQ(lcomm,PETSC_ERR_ARG_WRONGSTATE,"Leaf layout not setup"); 4478fa9e22eSVaclav Hapla ierr = MPI_Comm_compare(rcomm,lcomm,&flg);CHKERRMPI(ierr); 4488fa9e22eSVaclav Hapla if (flg != MPI_CONGRUENT && flg != MPI_IDENT) SETERRQ(rcomm,PETSC_ERR_SUP,"cannot map two layouts with non-matching communicators"); 4498fa9e22eSVaclav Hapla ierr = PetscSFCreate(rcomm,sf);CHKERRQ(ierr); 4508fa9e22eSVaclav Hapla ierr = PetscLayoutGetLocalSize(rmap,&nroots);CHKERRQ(ierr); 4518fa9e22eSVaclav Hapla ierr = PetscLayoutGetSize(rmap,&rN);CHKERRQ(ierr); 4528fa9e22eSVaclav Hapla ierr = PetscLayoutGetRange(lmap,&lst,&len);CHKERRQ(ierr); 4538fa9e22eSVaclav Hapla ierr = PetscMalloc1(len-lst,&remote);CHKERRQ(ierr); 4548fa9e22eSVaclav Hapla for (i = lst; i < len && i < rN; i++) { 4558fa9e22eSVaclav Hapla if (owner < -1 || i >= rmap->range[owner+1]) { 4568fa9e22eSVaclav Hapla ierr = PetscLayoutFindOwner(rmap,i,&owner);CHKERRQ(ierr); 4578fa9e22eSVaclav Hapla } 4588fa9e22eSVaclav Hapla remote[nleaves].rank = owner; 4598fa9e22eSVaclav Hapla remote[nleaves].index = i - rmap->range[owner]; 4608fa9e22eSVaclav Hapla nleaves++; 4618fa9e22eSVaclav Hapla } 4628fa9e22eSVaclav Hapla ierr = PetscSFSetGraph(*sf,nroots,nleaves,NULL,PETSC_OWN_POINTER,remote,PETSC_COPY_VALUES);CHKERRQ(ierr); 4638fa9e22eSVaclav Hapla ierr = PetscFree(remote);CHKERRQ(ierr); 4648fa9e22eSVaclav Hapla PetscFunctionReturn(0); 4658fa9e22eSVaclav Hapla } 4668fa9e22eSVaclav Hapla 4678fa9e22eSVaclav Hapla /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */ 4688fa9e22eSVaclav Hapla PetscErrorCode PetscLayoutMapLocal(PetscLayout map,PetscInt N,const PetscInt idxs[], PetscInt *on,PetscInt **oidxs,PetscInt **ogidxs) 4698fa9e22eSVaclav Hapla { 4708fa9e22eSVaclav Hapla PetscInt *owners = map->range; 4718fa9e22eSVaclav Hapla PetscInt n = map->n; 4728fa9e22eSVaclav Hapla PetscSF sf; 4738fa9e22eSVaclav Hapla PetscInt *lidxs,*work = NULL; 4748fa9e22eSVaclav Hapla PetscSFNode *ridxs; 4758fa9e22eSVaclav Hapla PetscMPIInt rank, p = 0; 4768fa9e22eSVaclav Hapla PetscInt r, len = 0; 4778fa9e22eSVaclav Hapla PetscErrorCode ierr; 4788fa9e22eSVaclav Hapla 4798fa9e22eSVaclav Hapla PetscFunctionBegin; 4808fa9e22eSVaclav Hapla if (on) *on = 0; /* squelch -Wmaybe-uninitialized */ 4818fa9e22eSVaclav Hapla /* Create SF where leaves are input idxs and roots are owned idxs */ 4828fa9e22eSVaclav Hapla ierr = MPI_Comm_rank(map->comm,&rank);CHKERRMPI(ierr); 4838fa9e22eSVaclav Hapla ierr = PetscMalloc1(n,&lidxs);CHKERRQ(ierr); 4848fa9e22eSVaclav Hapla for (r = 0; r < n; ++r) lidxs[r] = -1; 4858fa9e22eSVaclav Hapla ierr = PetscMalloc1(N,&ridxs);CHKERRQ(ierr); 4868fa9e22eSVaclav Hapla for (r = 0; r < N; ++r) { 4878fa9e22eSVaclav Hapla const PetscInt idx = idxs[r]; 4888fa9e22eSVaclav Hapla if (idx < 0 || map->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range [0,%D)",idx,map->N); 4898fa9e22eSVaclav Hapla if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this idx too */ 4908fa9e22eSVaclav Hapla ierr = PetscLayoutFindOwner(map,idx,&p);CHKERRQ(ierr); 4918fa9e22eSVaclav Hapla } 4928fa9e22eSVaclav Hapla ridxs[r].rank = p; 4938fa9e22eSVaclav Hapla ridxs[r].index = idxs[r] - owners[p]; 4948fa9e22eSVaclav Hapla } 4958fa9e22eSVaclav Hapla ierr = PetscSFCreate(map->comm,&sf);CHKERRQ(ierr); 4968fa9e22eSVaclav Hapla ierr = PetscSFSetGraph(sf,n,N,NULL,PETSC_OWN_POINTER,ridxs,PETSC_OWN_POINTER);CHKERRQ(ierr); 4978fa9e22eSVaclav Hapla ierr = PetscSFReduceBegin(sf,MPIU_INT,(PetscInt*)idxs,lidxs,MPI_LOR);CHKERRQ(ierr); 4988fa9e22eSVaclav Hapla ierr = PetscSFReduceEnd(sf,MPIU_INT,(PetscInt*)idxs,lidxs,MPI_LOR);CHKERRQ(ierr); 4998fa9e22eSVaclav Hapla if (ogidxs) { /* communicate global idxs */ 5008fa9e22eSVaclav Hapla PetscInt cum = 0,start,*work2; 5018fa9e22eSVaclav Hapla 5028fa9e22eSVaclav Hapla ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 5038fa9e22eSVaclav Hapla ierr = PetscCalloc1(N,&work2);CHKERRQ(ierr); 5048fa9e22eSVaclav Hapla for (r = 0; r < N; ++r) if (idxs[r] >=0) cum++; 5058fa9e22eSVaclav Hapla ierr = MPI_Scan(&cum,&start,1,MPIU_INT,MPI_SUM,map->comm);CHKERRMPI(ierr); 5068fa9e22eSVaclav Hapla start -= cum; 5078fa9e22eSVaclav Hapla cum = 0; 5088fa9e22eSVaclav Hapla for (r = 0; r < N; ++r) if (idxs[r] >=0) work2[r] = start+cum++; 50983df288dSJunchao Zhang ierr = PetscSFReduceBegin(sf,MPIU_INT,work2,work,MPI_REPLACE);CHKERRQ(ierr); 51083df288dSJunchao Zhang ierr = PetscSFReduceEnd(sf,MPIU_INT,work2,work,MPI_REPLACE);CHKERRQ(ierr); 5118fa9e22eSVaclav Hapla ierr = PetscFree(work2);CHKERRQ(ierr); 5128fa9e22eSVaclav Hapla } 5138fa9e22eSVaclav Hapla ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5148fa9e22eSVaclav Hapla /* Compress and put in indices */ 5158fa9e22eSVaclav Hapla for (r = 0; r < n; ++r) 5168fa9e22eSVaclav Hapla if (lidxs[r] >= 0) { 5178fa9e22eSVaclav Hapla if (work) work[len] = work[r]; 5188fa9e22eSVaclav Hapla lidxs[len++] = r; 5198fa9e22eSVaclav Hapla } 5208fa9e22eSVaclav Hapla if (on) *on = len; 5218fa9e22eSVaclav Hapla if (oidxs) *oidxs = lidxs; 5228fa9e22eSVaclav Hapla if (ogidxs) *ogidxs = work; 5238fa9e22eSVaclav Hapla PetscFunctionReturn(0); 5248fa9e22eSVaclav Hapla } 525*deffd5ebSksagiyam 526*deffd5ebSksagiyam /*@ 527*deffd5ebSksagiyam PetscSFCreateByMatchingIndices - Create SF by matching root and leaf indices 528*deffd5ebSksagiyam 529*deffd5ebSksagiyam Collective 530*deffd5ebSksagiyam 531*deffd5ebSksagiyam Input Parameters: 532*deffd5ebSksagiyam + layout - PetscLayout defining the global index space and the rank that brokers each index 533*deffd5ebSksagiyam . numRootIndices - size of rootIndices 534*deffd5ebSksagiyam . rootIndices - PetscInt array of global indices of which this process requests ownership 535*deffd5ebSksagiyam . rootLocalIndices - root local index permutation (NULL if no permutation) 536*deffd5ebSksagiyam . rootLocalOffset - offset to be added to root local indices 537*deffd5ebSksagiyam . numLeafIndices - size of leafIndices 538*deffd5ebSksagiyam . leafIndices - PetscInt array of global indices with which this process requires data associated 539*deffd5ebSksagiyam . leafLocalIndices - leaf local index permutation (NULL if no permutation) 540*deffd5ebSksagiyam - leafLocalOffset - offset to be added to leaf local indices 541*deffd5ebSksagiyam 542*deffd5ebSksagiyam Output Parameter: 543*deffd5ebSksagiyam + sfA - star forest representing the communication pattern from the layout space to the leaf space (NULL if not needed) 544*deffd5ebSksagiyam - sf - star forest representing the communication pattern from the root space to the leaf space 545*deffd5ebSksagiyam 546*deffd5ebSksagiyam Example 1: 547*deffd5ebSksagiyam $ 548*deffd5ebSksagiyam $ rank : 0 1 2 549*deffd5ebSksagiyam $ rootIndices : [1 0 2] [3] [3] 550*deffd5ebSksagiyam $ rootLocalOffset : 100 200 300 551*deffd5ebSksagiyam $ layout : [0 1] [2] [3] 552*deffd5ebSksagiyam $ leafIndices : [0] [2] [0 3] 553*deffd5ebSksagiyam $ leafLocalOffset : 400 500 600 554*deffd5ebSksagiyam $ 555*deffd5ebSksagiyam would build the following SF: 556*deffd5ebSksagiyam $ 557*deffd5ebSksagiyam $ [0] 400 <- (0,101) 558*deffd5ebSksagiyam $ [1] 500 <- (0,102) 559*deffd5ebSksagiyam $ [2] 600 <- (0,101) 560*deffd5ebSksagiyam $ [2] 601 <- (2,300) 561*deffd5ebSksagiyam $ 562*deffd5ebSksagiyam Example 2: 563*deffd5ebSksagiyam $ 564*deffd5ebSksagiyam $ rank : 0 1 2 565*deffd5ebSksagiyam $ rootIndices : [1 0 2] [3] [3] 566*deffd5ebSksagiyam $ rootLocalOffset : 100 200 300 567*deffd5ebSksagiyam $ layout : [0 1] [2] [3] 568*deffd5ebSksagiyam $ leafIndices : rootIndices rootIndices rootIndices 569*deffd5ebSksagiyam $ leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset 570*deffd5ebSksagiyam $ 571*deffd5ebSksagiyam would build the following SF: 572*deffd5ebSksagiyam $ 573*deffd5ebSksagiyam $ [1] 200 <- (2,300) 574*deffd5ebSksagiyam $ 575*deffd5ebSksagiyam Example 3: 576*deffd5ebSksagiyam $ 577*deffd5ebSksagiyam $ No process requests ownership of global index 1, but no process needs it. 578*deffd5ebSksagiyam $ 579*deffd5ebSksagiyam $ rank : 0 1 2 580*deffd5ebSksagiyam $ numRootIndices : 2 1 1 581*deffd5ebSksagiyam $ rootIndices : [0 2] [3] [3] 582*deffd5ebSksagiyam $ rootLocalOffset : 100 200 300 583*deffd5ebSksagiyam $ layout : [0 1] [2] [3] 584*deffd5ebSksagiyam $ numLeafIndices : 1 1 2 585*deffd5ebSksagiyam $ leafIndices : [0] [2] [0 3] 586*deffd5ebSksagiyam $ leafLocalOffset : 400 500 600 587*deffd5ebSksagiyam $ 588*deffd5ebSksagiyam would build the following SF: 589*deffd5ebSksagiyam $ 590*deffd5ebSksagiyam $ [0] 400 <- (0,100) 591*deffd5ebSksagiyam $ [1] 500 <- (0,101) 592*deffd5ebSksagiyam $ [2] 600 <- (0,100) 593*deffd5ebSksagiyam $ [2] 601 <- (2,300) 594*deffd5ebSksagiyam $ 595*deffd5ebSksagiyam 596*deffd5ebSksagiyam Notes: 597*deffd5ebSksagiyam The layout parameter represents any partitioning of [0, N), where N is the total number of global indices, and its 598*deffd5ebSksagiyam local size can be set to PETSC_DECIDE. 599*deffd5ebSksagiyam If a global index x lies in the partition owned by process i, each process whose rootIndices contains x requests 600*deffd5ebSksagiyam ownership of x and sends its own rank and the local index of x to process i. 601*deffd5ebSksagiyam If multiple processes request ownership of x, the one with the highest rank is to own x. 602*deffd5ebSksagiyam Process i then broadcasts the ownership information, so that each process whose leafIndices contains x knows the 603*deffd5ebSksagiyam ownership information of x. 604*deffd5ebSksagiyam The output sf is constructed by associating each leaf point to a root point in this way. 605*deffd5ebSksagiyam 606*deffd5ebSksagiyam Suppose there is point data ordered according to the global indices and partitioned according to the given layout. 607*deffd5ebSksagiyam The optional output PetscSF object sfA can be used to push such data to leaf points. 608*deffd5ebSksagiyam 609*deffd5ebSksagiyam All indices in rootIndices and leafIndices must lie in the layout range. The union (over all processes) of rootIndices 610*deffd5ebSksagiyam must cover that of leafIndices, but need not cover the entire layout. 611*deffd5ebSksagiyam 612*deffd5ebSksagiyam If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output 613*deffd5ebSksagiyam star forest is almost identity, so will only include non-trivial part of the map. 614*deffd5ebSksagiyam 615*deffd5ebSksagiyam Developer Notes: 616*deffd5ebSksagiyam Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using 617*deffd5ebSksagiyam hash(rank, root_local_index) as the bid for the ownership determination. 618*deffd5ebSksagiyam 619*deffd5ebSksagiyam Level: advanced 620*deffd5ebSksagiyam 621*deffd5ebSksagiyam .seealso: PetscSFCreate() 622*deffd5ebSksagiyam @*/ 623*deffd5ebSksagiyam PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt *rootIndices, const PetscInt *rootLocalIndices, PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt *leafIndices, const PetscInt *leafLocalIndices, PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf) 624*deffd5ebSksagiyam { 625*deffd5ebSksagiyam MPI_Comm comm = layout->comm; 626*deffd5ebSksagiyam PetscMPIInt size, rank; 627*deffd5ebSksagiyam PetscSF sf1; 628*deffd5ebSksagiyam PetscSFNode *owners, *buffer, *iremote; 629*deffd5ebSksagiyam PetscInt *ilocal, nleaves, N, n, i; 630*deffd5ebSksagiyam #if defined(PETSC_USE_DEBUG) 631*deffd5ebSksagiyam PetscInt N1; 632*deffd5ebSksagiyam #endif 633*deffd5ebSksagiyam PetscBool flag; 634*deffd5ebSksagiyam PetscErrorCode ierr; 635*deffd5ebSksagiyam 636*deffd5ebSksagiyam PetscFunctionBegin; 637*deffd5ebSksagiyam if (rootIndices) PetscValidIntPointer(rootIndices,3); 638*deffd5ebSksagiyam if (rootLocalIndices) PetscValidIntPointer(rootLocalIndices,4); 639*deffd5ebSksagiyam if (leafIndices) PetscValidIntPointer(leafIndices,7); 640*deffd5ebSksagiyam if (leafLocalIndices) PetscValidIntPointer(leafLocalIndices,8); 641*deffd5ebSksagiyam if (sfA) PetscValidPointer(sfA,10); 642*deffd5ebSksagiyam PetscValidPointer(sf,11); 643*deffd5ebSksagiyam if (numRootIndices < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%D) must be non-negative", numRootIndices); 644*deffd5ebSksagiyam if (numLeafIndices < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%D) must be non-negative", numLeafIndices); 645*deffd5ebSksagiyam ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 646*deffd5ebSksagiyam ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 647*deffd5ebSksagiyam ierr = PetscLayoutSetUp(layout);CHKERRQ(ierr); 648*deffd5ebSksagiyam ierr = PetscLayoutGetSize(layout, &N);CHKERRQ(ierr); 649*deffd5ebSksagiyam ierr = PetscLayoutGetLocalSize(layout, &n);CHKERRQ(ierr); 650*deffd5ebSksagiyam flag = (PetscBool)(leafIndices == rootIndices); 651*deffd5ebSksagiyam ierr = MPIU_Allreduce(MPI_IN_PLACE, &flag, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr); 652*deffd5ebSksagiyam if (flag && numLeafIndices != numRootIndices) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%D) != numRootIndices(%D)", numLeafIndices, numRootIndices); 653*deffd5ebSksagiyam #if defined(PETSC_USE_DEBUG) 654*deffd5ebSksagiyam N1 = PETSC_MIN_INT; 655*deffd5ebSksagiyam for (i = 0; i < numRootIndices; i++) if (rootIndices[i] > N1) N1 = rootIndices[i]; 656*deffd5ebSksagiyam ierr = MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm);CHKERRMPI(ierr); 657*deffd5ebSksagiyam if (N1 >= N) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%D) out of layout range [0,%D)", N1, N); 658*deffd5ebSksagiyam if (!flag) { 659*deffd5ebSksagiyam N1 = PETSC_MIN_INT; 660*deffd5ebSksagiyam for (i = 0; i < numLeafIndices; i++) if (leafIndices[i] > N1) N1 = leafIndices[i]; 661*deffd5ebSksagiyam ierr = MPI_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm);CHKERRMPI(ierr); 662*deffd5ebSksagiyam if (N1 >= N) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%D) out of layout range [0,%D)", N1, N); 663*deffd5ebSksagiyam } 664*deffd5ebSksagiyam #endif 665*deffd5ebSksagiyam /* Reduce: owners -> buffer */ 666*deffd5ebSksagiyam ierr = PetscMalloc1(n, &buffer);CHKERRQ(ierr); 667*deffd5ebSksagiyam ierr = PetscSFCreate(comm, &sf1);CHKERRQ(ierr); 668*deffd5ebSksagiyam ierr = PetscSFSetFromOptions(sf1);CHKERRQ(ierr); 669*deffd5ebSksagiyam ierr = PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices);CHKERRQ(ierr); 670*deffd5ebSksagiyam ierr = PetscMalloc1(numRootIndices, &owners);CHKERRQ(ierr); 671*deffd5ebSksagiyam for (i = 0; i < numRootIndices; ++i) { 672*deffd5ebSksagiyam owners[i].rank = rank; 673*deffd5ebSksagiyam owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i); 674*deffd5ebSksagiyam } 675*deffd5ebSksagiyam for (i = 0; i < n; ++i) { 676*deffd5ebSksagiyam buffer[i].index = -1; 677*deffd5ebSksagiyam buffer[i].rank = -1; 678*deffd5ebSksagiyam } 679*deffd5ebSksagiyam ierr = PetscSFReduceBegin(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC);CHKERRQ(ierr); 680*deffd5ebSksagiyam ierr = PetscSFReduceEnd(sf1, MPIU_2INT, owners, buffer, MPI_MAXLOC);CHKERRQ(ierr); 681*deffd5ebSksagiyam /* Bcast: buffer -> owners */ 682*deffd5ebSksagiyam if (!flag) { 683*deffd5ebSksagiyam /* leafIndices is different from rootIndices */ 684*deffd5ebSksagiyam ierr = PetscFree(owners);CHKERRQ(ierr); 685*deffd5ebSksagiyam ierr = PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices);CHKERRQ(ierr); 686*deffd5ebSksagiyam ierr = PetscMalloc1(numLeafIndices, &owners);CHKERRQ(ierr); 687*deffd5ebSksagiyam } 688*deffd5ebSksagiyam ierr = PetscSFBcastBegin(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE);CHKERRQ(ierr); 689*deffd5ebSksagiyam ierr = PetscSFBcastEnd(sf1, MPIU_2INT, buffer, owners, MPI_REPLACE);CHKERRQ(ierr); 690*deffd5ebSksagiyam for (i = 0; i < numLeafIndices; ++i) if (owners[i].rank < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %D was unclaimed", leafIndices[i]); 691*deffd5ebSksagiyam ierr = PetscFree(buffer);CHKERRQ(ierr); 692*deffd5ebSksagiyam if (sfA) {*sfA = sf1;} 693*deffd5ebSksagiyam else {ierr = PetscSFDestroy(&sf1);CHKERRQ(ierr);} 694*deffd5ebSksagiyam /* Create sf */ 695*deffd5ebSksagiyam if (flag && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) { 696*deffd5ebSksagiyam /* leaf space == root space */ 697*deffd5ebSksagiyam for (i = 0, nleaves = 0; i < numLeafIndices; ++i) if (owners[i].rank != rank) ++nleaves; 698*deffd5ebSksagiyam ierr = PetscMalloc1(nleaves, &ilocal);CHKERRQ(ierr); 699*deffd5ebSksagiyam ierr = PetscMalloc1(nleaves, &iremote);CHKERRQ(ierr); 700*deffd5ebSksagiyam for (i = 0, nleaves = 0; i < numLeafIndices; ++i) { 701*deffd5ebSksagiyam if (owners[i].rank != rank) { 702*deffd5ebSksagiyam ilocal[nleaves] = leafLocalOffset + i; 703*deffd5ebSksagiyam iremote[nleaves].rank = owners[i].rank; 704*deffd5ebSksagiyam iremote[nleaves].index = owners[i].index; 705*deffd5ebSksagiyam ++nleaves; 706*deffd5ebSksagiyam } 707*deffd5ebSksagiyam } 708*deffd5ebSksagiyam ierr = PetscFree(owners);CHKERRQ(ierr); 709*deffd5ebSksagiyam } else { 710*deffd5ebSksagiyam nleaves = numLeafIndices; 711*deffd5ebSksagiyam ierr = PetscMalloc1(nleaves, &ilocal);CHKERRQ(ierr); 712*deffd5ebSksagiyam for (i = 0; i < nleaves; ++i) {ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);} 713*deffd5ebSksagiyam iremote = owners; 714*deffd5ebSksagiyam } 715*deffd5ebSksagiyam ierr = PetscSFCreate(comm, sf);CHKERRQ(ierr); 716*deffd5ebSksagiyam ierr = PetscSFSetFromOptions(*sf);CHKERRQ(ierr); 717*deffd5ebSksagiyam ierr = PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);CHKERRQ(ierr); 718*deffd5ebSksagiyam PetscFunctionReturn(0); 719*deffd5ebSksagiyam } 720