140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4b23bfdefSJunchao Zhang 540e23c03SJunchao Zhang /*===================================================================================*/ 640e23c03SJunchao Zhang /* SF public interface implementations */ 740e23c03SJunchao Zhang /*===================================================================================*/ 89371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) { 9b23bfdefSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 1071438e86SJunchao Zhang PetscInt *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks; 1140e23c03SJunchao Zhang PetscMPIInt rank, niranks, *iranks, tag; 1295fce210SBarry Smith MPI_Comm comm; 13b5a8e515SJed Brown MPI_Group group; 1440e23c03SJunchao Zhang MPI_Request *rootreqs, *leafreqs; 1595fce210SBarry Smith 1695fce210SBarry Smith PetscFunctionBegin; 179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group)); 189566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpRanks(sf, group)); 199566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 209566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 219566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag)); 229566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2395fce210SBarry Smith /* 2495fce210SBarry Smith * Inform roots about how many leaves and from which ranks 2595fce210SBarry Smith */ 269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &rlengths)); 27cd620004SJunchao Zhang /* Determine number, sending ranks and length of incoming */ 289371c9d4SSatish Balay for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ } 2971438e86SJunchao Zhang nRemoteRootRanks = sf->nranks - sf->ndranks; 309566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths)); 31c943f53fSJed Brown 320b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 330b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 340b899082SJunchao Zhang small and the sorting is cheap. 350b899082SJunchao Zhang */ 369566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths)); 370b899082SJunchao Zhang 38c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 39c943f53fSJed Brown bas->ndiranks = sf->ndranks; 40c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 419566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset)); 42c943f53fSJed Brown bas->ioffset[0] = 0; 43c943f53fSJed Brown for (i = 0; i < bas->ndiranks; i++) { 44c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 45c943f53fSJed Brown bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i]; 46c943f53fSJed Brown } 47c9cc58a2SBarry Smith PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks"); 4840e23c03SJunchao Zhang for (; i < bas->niranks; i++) { 49c943f53fSJed Brown bas->iranks[i] = iranks[i - bas->ndiranks]; 50c943f53fSJed Brown bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks]; 51c943f53fSJed Brown } 52c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 539566063dSJacob Faibussowitsch PetscCall(PetscFree(rlengths)); 549566063dSJacob Faibussowitsch PetscCall(PetscFree(iranks)); 559566063dSJacob Faibussowitsch PetscCall(PetscFree(ilengths)); 5695fce210SBarry Smith 5795fce210SBarry Smith /* Send leaf identities to roots */ 5871438e86SJunchao Zhang nRemoteLeafRanks = bas->niranks - bas->ndiranks; 599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc)); 609566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs)); 6148a46eb9SPierre Jolivet for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks])); 6240e23c03SJunchao Zhang for (i = 0; i < sf->nranks; i++) { 63c87b50c4SJunchao Zhang PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i]; 6440e23c03SJunchao Zhang if (i < sf->ndranks) { 6508401ef6SPierre Jolivet PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank"); 6608401ef6SPierre Jolivet PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank"); 6708401ef6SPierre Jolivet PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths"); 689566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints)); 69c943f53fSJed Brown continue; 70c943f53fSJed Brown } 719566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks])); 72bf39f1bfSJed Brown } 739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE)); 749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE)); 7595fce210SBarry Smith 7671438e86SJunchao Zhang sf->nleafreqs = nRemoteRootRanks; 7771438e86SJunchao Zhang bas->nrootreqs = nRemoteLeafRanks; 78cd620004SJunchao Zhang sf->persistent = PETSC_TRUE; 79eb02082bSJunchao Zhang 8071438e86SJunchao Zhang /* Setup fields related to packing, such as rootbuflen[] */ 819566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(sf)); 829566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootreqs, leafreqs)); 8395fce210SBarry Smith PetscFunctionReturn(0); 8495fce210SBarry Smith } 8595fce210SBarry Smith 869371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) { 87cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 8871438e86SJunchao Zhang PetscSFLink link = bas->avail, next; 8995fce210SBarry Smith 9095fce210SBarry Smith PetscFunctionBegin; 9128b400f6SJacob Faibussowitsch PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 929566063dSJacob Faibussowitsch PetscCall(PetscFree2(bas->iranks, bas->ioffset)); 939566063dSJacob Faibussowitsch PetscCall(PetscFree(bas->irootloc)); 9471438e86SJunchao Zhang 957fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 969566063dSJacob Faibussowitsch for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i])); 97eb02082bSJunchao Zhang #endif 9871438e86SJunchao Zhang 9971438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 1009566063dSJacob Faibussowitsch PetscCall(PetscSFReset_Basic_NVSHMEM(sf)); 10171438e86SJunchao Zhang #endif 10271438e86SJunchao Zhang 1039371c9d4SSatish Balay for (; link; link = next) { 1049371c9d4SSatish Balay next = link->next; 1059371c9d4SSatish Balay PetscCall(PetscSFLinkDestroy(sf, link)); 1069371c9d4SSatish Balay } 10771438e86SJunchao Zhang bas->avail = NULL; 1089566063dSJacob Faibussowitsch PetscCall(PetscSFResetPackFields(sf)); 10995fce210SBarry Smith PetscFunctionReturn(0); 11095fce210SBarry Smith } 11195fce210SBarry Smith 1129371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) { 11395fce210SBarry Smith PetscFunctionBegin; 1149566063dSJacob Faibussowitsch PetscCall(PetscSFReset_Basic(sf)); 1159566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->data)); 11695fce210SBarry Smith PetscFunctionReturn(0); 11795fce210SBarry Smith } 11895fce210SBarry Smith 11962152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 12062152dedSBarry Smith #include <petscmat.h> 12162152dedSBarry Smith 1229371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer) { 12362152dedSBarry Smith PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 12453dd6d7dSJunchao Zhang PetscInt i, nrootranks, ndrootranks; 12562152dedSBarry Smith const PetscInt *rootoffset; 12662152dedSBarry Smith PetscMPIInt rank, size; 12753dd6d7dSJunchao Zhang const PetscMPIInt *rootranks; 12862152dedSBarry Smith MPI_Comm comm = PetscObjectComm((PetscObject)sf); 12953dd6d7dSJunchao Zhang PetscScalar unitbytes; 13062152dedSBarry Smith Mat A; 13162152dedSBarry Smith 13262152dedSBarry Smith PetscFunctionBegin; 1339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 1349566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 13553dd6d7dSJunchao Zhang /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 13653dd6d7dSJunchao Zhang PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 13753dd6d7dSJunchao Zhang in senders' view point: how many bytes I will send to my neighbors. 13853dd6d7dSJunchao Zhang 13953dd6d7dSJunchao Zhang Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 14053dd6d7dSJunchao Zhang 14153dd6d7dSJunchao Zhang If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 14253dd6d7dSJunchao Zhang different interpretations for the same SF for different data types. Since we most care about VecScatter, 14353dd6d7dSJunchao Zhang we uniformly treat each vertex as a PetscScalar. 14453dd6d7dSJunchao Zhang */ 14553dd6d7dSJunchao Zhang unitbytes = (PetscScalar)sizeof(PetscScalar); 14653dd6d7dSJunchao Zhang 1479566063dSJacob Faibussowitsch PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL)); 1489566063dSJacob Faibussowitsch PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A)); 1499566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */ 15048a46eb9SPierre Jolivet for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES)); 1519566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1529566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1539566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 1549566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 15562152dedSBarry Smith PetscFunctionReturn(0); 15662152dedSBarry Smith } 15762152dedSBarry Smith #endif 15862152dedSBarry Smith 1599371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer) { 16053dd6d7dSJunchao Zhang PetscBool isascii; 16195fce210SBarry Smith 16295fce210SBarry Smith PetscFunctionBegin; 1639566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1649566063dSJacob Faibussowitsch if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered")); 16562152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 16653dd6d7dSJunchao Zhang else { 16753dd6d7dSJunchao Zhang PetscBool isdraw, isbinary; 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 17048a46eb9SPierre Jolivet if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer)); 17162152dedSBarry Smith } 17262152dedSBarry Smith #endif 17395fce210SBarry Smith PetscFunctionReturn(0); 17495fce210SBarry Smith } 17595fce210SBarry Smith 1769371c9d4SSatish Balay static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 177cd620004SJunchao Zhang PetscSFLink link = NULL; 17895fce210SBarry Smith 17995fce210SBarry Smith PetscFunctionBegin; 18071438e86SJunchao Zhang /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 1819566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 18271438e86SJunchao Zhang /* Pack rootdata to rootbuf for remote communication */ 1839566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 18471438e86SJunchao Zhang /* Start communcation, e.g., post MPI_Isend */ 1859566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 18671438e86SJunchao Zhang /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 1879566063dSJacob Faibussowitsch PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 18895fce210SBarry Smith PetscFunctionReturn(0); 18995fce210SBarry Smith } 19095fce210SBarry Smith 1919371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) { 192cd620004SJunchao Zhang PetscSFLink link = NULL; 19395fce210SBarry Smith 19495fce210SBarry Smith PetscFunctionBegin; 195cd620004SJunchao Zhang /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 1969566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 19771438e86SJunchao Zhang /* Finish remote communication, e.g., post MPI_Waitall */ 1989566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 19971438e86SJunchao Zhang /* Unpack data in leafbuf to leafdata for remote communication */ 2009566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op)); 20171438e86SJunchao Zhang /* Recycle the link */ 2029566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 203cd620004SJunchao Zhang PetscFunctionReturn(0); 204cd620004SJunchao Zhang } 205cd620004SJunchao Zhang 206cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */ 2079371c9d4SSatish Balay static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) { 20871438e86SJunchao Zhang PetscSFLink link = NULL; 209cd620004SJunchao Zhang 210cd620004SJunchao Zhang PetscFunctionBegin; 2119566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 2129566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 2139566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT)); 214cd620004SJunchao Zhang *out = link; 21595fce210SBarry Smith PetscFunctionReturn(0); 21695fce210SBarry Smith } 21795fce210SBarry Smith 21895fce210SBarry Smith /* leaf -> root with reduction */ 2199371c9d4SSatish Balay static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 220cd620004SJunchao Zhang PetscSFLink link = NULL; 22195fce210SBarry Smith 22295fce210SBarry Smith PetscFunctionBegin; 2239566063dSJacob Faibussowitsch PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 2249566063dSJacob Faibussowitsch PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 22595fce210SBarry Smith PetscFunctionReturn(0); 22695fce210SBarry Smith } 22795fce210SBarry Smith 2289371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) { 229cd620004SJunchao Zhang PetscSFLink link = NULL; 23095fce210SBarry Smith 23195fce210SBarry Smith PetscFunctionBegin; 2329566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 2339566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 2349566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op)); 2359566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 23695fce210SBarry Smith PetscFunctionReturn(0); 23795fce210SBarry Smith } 23895fce210SBarry Smith 2399371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) { 240cd620004SJunchao Zhang PetscSFLink link = NULL; 24195fce210SBarry Smith 24295fce210SBarry Smith PetscFunctionBegin; 2439566063dSJacob Faibussowitsch PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 2449566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 24595fce210SBarry Smith PetscFunctionReturn(0); 24695fce210SBarry Smith } 24795fce210SBarry Smith 2489371c9d4SSatish Balay static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) { 249cd620004SJunchao Zhang PetscSFLink link = NULL; 25095fce210SBarry Smith 25195fce210SBarry Smith PetscFunctionBegin; 2529566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 25395fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 2549566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 255cd620004SJunchao Zhang /* Do fetch-and-op, the (remote) update results are in rootbuf */ 2569566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 257cd620004SJunchao Zhang /* Bcast rootbuf to leafupdate */ 2589566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 2599566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 260b23bfdefSJunchao Zhang /* Unpack and insert fetched data into leaves */ 2619566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 2629566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 26395fce210SBarry Smith PetscFunctionReturn(0); 26495fce210SBarry Smith } 26595fce210SBarry Smith 2669371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) { 2678750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 2688750ddebSJunchao Zhang 2698750ddebSJunchao Zhang PetscFunctionBegin; 2708750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 2718750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 2728750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 2738750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 2748750ddebSJunchao Zhang PetscFunctionReturn(0); 2758750ddebSJunchao Zhang } 2768750ddebSJunchao Zhang 27772502a1fSJunchao Zhang /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf. 278f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 279f659e5c7SJunchao Zhang was sorted before calling the routine. 280f659e5c7SJunchao Zhang */ 2819371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) { 282f659e5c7SJunchao Zhang PetscSF esf; 283cd620004SJunchao Zhang PetscInt esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote; 284cd620004SJunchao Zhang PetscInt i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal; 285cd620004SJunchao Zhang char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */ 286f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 287f659e5c7SJunchao Zhang const PetscMPIInt *ranks, *iranks; 288cd620004SJunchao Zhang const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc; 289f659e5c7SJunchao Zhang PetscBool connected; 290f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 291f659e5c7SJunchao Zhang PetscSF_Basic *bas; 292f659e5c7SJunchao Zhang 293f659e5c7SJunchao Zhang PetscFunctionBegin; 2949566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf)); 2959566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(esf)); 2969566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 297f659e5c7SJunchao Zhang 298cd620004SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 2999566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 3009566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 301cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 3029566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 303cd620004SJunchao Zhang leafdata = leafmem - minleaf; 304f659e5c7SJunchao Zhang /* Tag selected roots */ 305f659e5c7SJunchao Zhang for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1; 306f659e5c7SJunchao Zhang 3079566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 3089566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 3099566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */ 310cd620004SJunchao Zhang esf_nranks = esf_ndranks = esf_nleaves = 0; 311b23bfdefSJunchao Zhang for (i = 0; i < nranks; i++) { 312cd620004SJunchao Zhang connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 3139371c9d4SSatish Balay for (j = roffset[i]; j < roffset[i + 1]; j++) { 3149371c9d4SSatish Balay if (leafdata[rmine[j]]) { 3159371c9d4SSatish Balay esf_nleaves++; 3169371c9d4SSatish Balay connected = PETSC_TRUE; 3179371c9d4SSatish Balay } 3189371c9d4SSatish Balay } 3199371c9d4SSatish Balay if (connected) { 3209371c9d4SSatish Balay esf_nranks++; 3219371c9d4SSatish Balay if (i < ndranks) esf_ndranks++; 3229371c9d4SSatish Balay } 323f659e5c7SJunchao Zhang } 324f659e5c7SJunchao Zhang 325f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 3269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 3279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 3289566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote)); 329f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 330f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 331f659e5c7SJunchao Zhang esf_roffset[0] = 0; 332f659e5c7SJunchao Zhang for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */ 333f659e5c7SJunchao Zhang connected = PETSC_FALSE; 334cd620004SJunchao Zhang for (j = roffset[i]; j < roffset[i + 1]; j++) { 335cd620004SJunchao Zhang if (leafdata[rmine[j]]) { 336f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 337f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 338f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 339f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 340f659e5c7SJunchao Zhang connected = PETSC_TRUE; 341f659e5c7SJunchao Zhang q++; 342f659e5c7SJunchao Zhang } 343f659e5c7SJunchao Zhang } 344f659e5c7SJunchao Zhang if (connected) { 345f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 346f659e5c7SJunchao Zhang esf_roffset[p + 1] = q; 347f659e5c7SJunchao Zhang p++; 348f659e5c7SJunchao Zhang } 349f659e5c7SJunchao Zhang } 350f659e5c7SJunchao Zhang 351f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 3529566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 353f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 354f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 355f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 356f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 357f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 358f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 359cd620004SJunchao Zhang esf->nleafreqs = esf_nranks - esf_ndranks; 360f659e5c7SJunchao Zhang 361f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 362f659e5c7SJunchao Zhang bas = (PetscSF_Basic *)esf->data; 3639566063dSJacob Faibussowitsch PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */ 364f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 365cd620004SJunchao Zhang we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 366f659e5c7SJunchao Zhang */ 3679566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset)); 3689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc)); 369f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 370f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 371f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 372f659e5c7SJunchao Zhang for (i = 0; i < niranks; i++) { 373f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 374f659e5c7SJunchao Zhang for (j = ioffset[i]; j < ioffset[i + 1]; j++) { 375cd620004SJunchao Zhang if (rootdata[irootloc[j]]) { 376f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 377f659e5c7SJunchao Zhang connected = PETSC_TRUE; 378f659e5c7SJunchao Zhang } 379f659e5c7SJunchao Zhang } 380f659e5c7SJunchao Zhang if (connected) { 381f659e5c7SJunchao Zhang bas->niranks++; 382f659e5c7SJunchao Zhang if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 383f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 384f659e5c7SJunchao Zhang bas->ioffset[p + 1] = q; 385f659e5c7SJunchao Zhang p++; 386f659e5c7SJunchao Zhang } 387f659e5c7SJunchao Zhang } 388f659e5c7SJunchao Zhang bas->itotal = q; 389cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 390cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 391cd620004SJunchao Zhang /* Setup packing related fields */ 3929566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 393f659e5c7SJunchao Zhang 39420c24465SJunchao Zhang /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 39520c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 39620c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_CUDA) { 39771438e86SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_CUDA; 39871438e86SJunchao Zhang esf->ops->Free = PetscSFFree_CUDA; 39920c24465SJunchao Zhang } 40020c24465SJunchao Zhang #endif 40120c24465SJunchao Zhang 40259af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 40359af0bd3SScott Kruger /* TODO: Needs debugging */ 40459af0bd3SScott Kruger if (esf->backend == PETSCSF_BACKEND_HIP) { 40559af0bd3SScott Kruger esf->ops->Malloc = PetscSFMalloc_HIP; 40659af0bd3SScott Kruger esf->ops->Free = PetscSFFree_HIP; 40759af0bd3SScott Kruger } 40859af0bd3SScott Kruger #endif 40959af0bd3SScott Kruger 41020c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 41120c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 41220c24465SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_Kokkos; 41320c24465SJunchao Zhang esf->ops->Free = PetscSFFree_Kokkos; 41420c24465SJunchao Zhang } 41520c24465SJunchao Zhang #endif 416f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 4179566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem)); 418f659e5c7SJunchao Zhang *newsf = esf; 419f659e5c7SJunchao Zhang PetscFunctionReturn(0); 420f659e5c7SJunchao Zhang } 421f659e5c7SJunchao Zhang 4229371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) { 42340e23c03SJunchao Zhang PetscSF_Basic *dat; 42495fce210SBarry Smith 42595fce210SBarry Smith PetscFunctionBegin; 42695fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 42795fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 42895fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 42995fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 430ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 431ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 43295fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 43395fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 43495fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 43595fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 4368750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 43772502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 43895fce210SBarry Smith 439*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 44040e23c03SJunchao Zhang sf->data = (void *)dat; 44195fce210SBarry Smith PetscFunctionReturn(0); 44295fce210SBarry Smith } 443