140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4b23bfdefSJunchao Zhang 540e23c03SJunchao Zhang /*===================================================================================*/ 640e23c03SJunchao Zhang /* SF public interface implementations */ 740e23c03SJunchao Zhang /*===================================================================================*/ 8d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 9d71ae5a4SJacob Faibussowitsch { 10b23bfdefSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 1171438e86SJunchao Zhang PetscInt *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks; 1240e23c03SJunchao Zhang PetscMPIInt rank, niranks, *iranks, tag; 1395fce210SBarry Smith MPI_Comm comm; 14b5a8e515SJed Brown MPI_Group group; 1540e23c03SJunchao Zhang MPI_Request *rootreqs, *leafreqs; 1695fce210SBarry Smith 1795fce210SBarry Smith PetscFunctionBegin; 189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group)); 199566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpRanks(sf, group)); 209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 219566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 229566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag)); 239566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2495fce210SBarry Smith /* 2595fce210SBarry Smith * Inform roots about how many leaves and from which ranks 2695fce210SBarry Smith */ 279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &rlengths)); 28cd620004SJunchao Zhang /* Determine number, sending ranks and length of incoming */ 299371c9d4SSatish Balay for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ } 3071438e86SJunchao Zhang nRemoteRootRanks = sf->nranks - sf->ndranks; 319566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths)); 32c943f53fSJed Brown 330b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 340b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 350b899082SJunchao Zhang small and the sorting is cheap. 360b899082SJunchao Zhang */ 379566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths)); 380b899082SJunchao Zhang 39c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 40c943f53fSJed Brown bas->ndiranks = sf->ndranks; 41c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset)); 43c943f53fSJed Brown bas->ioffset[0] = 0; 44c943f53fSJed Brown for (i = 0; i < bas->ndiranks; i++) { 45c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 46c943f53fSJed Brown bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i]; 47c943f53fSJed Brown } 48c9cc58a2SBarry Smith PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks"); 4940e23c03SJunchao Zhang for (; i < bas->niranks; i++) { 50c943f53fSJed Brown bas->iranks[i] = iranks[i - bas->ndiranks]; 51c943f53fSJed Brown bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks]; 52c943f53fSJed Brown } 53c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 549566063dSJacob Faibussowitsch PetscCall(PetscFree(rlengths)); 559566063dSJacob Faibussowitsch PetscCall(PetscFree(iranks)); 569566063dSJacob Faibussowitsch PetscCall(PetscFree(ilengths)); 5795fce210SBarry Smith 5895fce210SBarry Smith /* Send leaf identities to roots */ 5971438e86SJunchao Zhang nRemoteLeafRanks = bas->niranks - bas->ndiranks; 609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc)); 619566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs)); 6248a46eb9SPierre Jolivet for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks])); 6340e23c03SJunchao Zhang for (i = 0; i < sf->nranks; i++) { 64c87b50c4SJunchao Zhang PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i]; 6540e23c03SJunchao Zhang if (i < sf->ndranks) { 6608401ef6SPierre Jolivet PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank"); 6708401ef6SPierre Jolivet PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank"); 6808401ef6SPierre Jolivet PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths"); 699566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints)); 70c943f53fSJed Brown continue; 71c943f53fSJed Brown } 729566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks])); 73bf39f1bfSJed Brown } 749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE)); 759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE)); 7695fce210SBarry Smith 7771438e86SJunchao Zhang sf->nleafreqs = nRemoteRootRanks; 7871438e86SJunchao Zhang bas->nrootreqs = nRemoteLeafRanks; 79cd620004SJunchao Zhang sf->persistent = PETSC_TRUE; 80eb02082bSJunchao Zhang 8171438e86SJunchao Zhang /* Setup fields related to packing, such as rootbuflen[] */ 829566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(sf)); 839566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootreqs, leafreqs)); 8495fce210SBarry Smith PetscFunctionReturn(0); 8595fce210SBarry Smith } 8695fce210SBarry Smith 87d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 88d71ae5a4SJacob Faibussowitsch { 89cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 9071438e86SJunchao Zhang PetscSFLink link = bas->avail, next; 9195fce210SBarry Smith 9295fce210SBarry Smith PetscFunctionBegin; 9328b400f6SJacob Faibussowitsch PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 949566063dSJacob Faibussowitsch PetscCall(PetscFree2(bas->iranks, bas->ioffset)); 959566063dSJacob Faibussowitsch PetscCall(PetscFree(bas->irootloc)); 9671438e86SJunchao Zhang 977fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 989566063dSJacob Faibussowitsch for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i])); 99eb02082bSJunchao Zhang #endif 10071438e86SJunchao Zhang 10171438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 1029566063dSJacob Faibussowitsch PetscCall(PetscSFReset_Basic_NVSHMEM(sf)); 10371438e86SJunchao Zhang #endif 10471438e86SJunchao Zhang 1059371c9d4SSatish Balay for (; link; link = next) { 1069371c9d4SSatish Balay next = link->next; 1079371c9d4SSatish Balay PetscCall(PetscSFLinkDestroy(sf, link)); 1089371c9d4SSatish Balay } 10971438e86SJunchao Zhang bas->avail = NULL; 1109566063dSJacob Faibussowitsch PetscCall(PetscSFResetPackFields(sf)); 11195fce210SBarry Smith PetscFunctionReturn(0); 11295fce210SBarry Smith } 11395fce210SBarry Smith 114d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 115d71ae5a4SJacob Faibussowitsch { 11695fce210SBarry Smith PetscFunctionBegin; 1179566063dSJacob Faibussowitsch PetscCall(PetscSFReset_Basic(sf)); 1189566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->data)); 11995fce210SBarry Smith PetscFunctionReturn(0); 12095fce210SBarry Smith } 12195fce210SBarry Smith 12262152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 12362152dedSBarry Smith #include <petscmat.h> 12462152dedSBarry Smith 125d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer) 126d71ae5a4SJacob Faibussowitsch { 12762152dedSBarry Smith PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 12853dd6d7dSJunchao Zhang PetscInt i, nrootranks, ndrootranks; 12962152dedSBarry Smith const PetscInt *rootoffset; 13062152dedSBarry Smith PetscMPIInt rank, size; 13153dd6d7dSJunchao Zhang const PetscMPIInt *rootranks; 13262152dedSBarry Smith MPI_Comm comm = PetscObjectComm((PetscObject)sf); 13353dd6d7dSJunchao Zhang PetscScalar unitbytes; 13462152dedSBarry Smith Mat A; 13562152dedSBarry Smith 13662152dedSBarry Smith PetscFunctionBegin; 1379566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 1389566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 13953dd6d7dSJunchao Zhang /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 14053dd6d7dSJunchao Zhang PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 14153dd6d7dSJunchao Zhang in senders' view point: how many bytes I will send to my neighbors. 14253dd6d7dSJunchao Zhang 14353dd6d7dSJunchao Zhang Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 14453dd6d7dSJunchao Zhang 14553dd6d7dSJunchao Zhang If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 14653dd6d7dSJunchao Zhang different interpretations for the same SF for different data types. Since we most care about VecScatter, 14753dd6d7dSJunchao Zhang we uniformly treat each vertex as a PetscScalar. 14853dd6d7dSJunchao Zhang */ 14953dd6d7dSJunchao Zhang unitbytes = (PetscScalar)sizeof(PetscScalar); 15053dd6d7dSJunchao Zhang 1519566063dSJacob Faibussowitsch PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL)); 1529566063dSJacob Faibussowitsch PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A)); 1539566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */ 15448a46eb9SPierre Jolivet for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES)); 1559566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1569566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1579566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 1589566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 15962152dedSBarry Smith PetscFunctionReturn(0); 16062152dedSBarry Smith } 16162152dedSBarry Smith #endif 16262152dedSBarry Smith 163d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer) 164d71ae5a4SJacob Faibussowitsch { 16553dd6d7dSJunchao Zhang PetscBool isascii; 16695fce210SBarry Smith 16795fce210SBarry Smith PetscFunctionBegin; 1689566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1699566063dSJacob Faibussowitsch if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered")); 17062152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 17153dd6d7dSJunchao Zhang else { 17253dd6d7dSJunchao Zhang PetscBool isdraw, isbinary; 1739566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1749566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 17548a46eb9SPierre Jolivet if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer)); 17662152dedSBarry Smith } 17762152dedSBarry Smith #endif 17895fce210SBarry Smith PetscFunctionReturn(0); 17995fce210SBarry Smith } 18095fce210SBarry Smith 181d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 182d71ae5a4SJacob Faibussowitsch { 183cd620004SJunchao Zhang PetscSFLink link = NULL; 18495fce210SBarry Smith 18595fce210SBarry Smith PetscFunctionBegin; 18671438e86SJunchao Zhang /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 1879566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 18871438e86SJunchao Zhang /* Pack rootdata to rootbuf for remote communication */ 1899566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 190*da81f932SPierre Jolivet /* Start communication, e.g., post MPI_Isend */ 1919566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 19271438e86SJunchao Zhang /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 1939566063dSJacob Faibussowitsch PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 19495fce210SBarry Smith PetscFunctionReturn(0); 19595fce210SBarry Smith } 19695fce210SBarry Smith 197d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 198d71ae5a4SJacob Faibussowitsch { 199cd620004SJunchao Zhang PetscSFLink link = NULL; 20095fce210SBarry Smith 20195fce210SBarry Smith PetscFunctionBegin; 202cd620004SJunchao Zhang /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 2039566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 20471438e86SJunchao Zhang /* Finish remote communication, e.g., post MPI_Waitall */ 2059566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 20671438e86SJunchao Zhang /* Unpack data in leafbuf to leafdata for remote communication */ 2079566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op)); 20871438e86SJunchao Zhang /* Recycle the link */ 2099566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 210cd620004SJunchao Zhang PetscFunctionReturn(0); 211cd620004SJunchao Zhang } 212cd620004SJunchao Zhang 213cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */ 214d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) 215d71ae5a4SJacob Faibussowitsch { 21671438e86SJunchao Zhang PetscSFLink link = NULL; 217cd620004SJunchao Zhang 218cd620004SJunchao Zhang PetscFunctionBegin; 2199566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 2209566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 2219566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT)); 222cd620004SJunchao Zhang *out = link; 22395fce210SBarry Smith PetscFunctionReturn(0); 22495fce210SBarry Smith } 22595fce210SBarry Smith 22695fce210SBarry Smith /* leaf -> root with reduction */ 227d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 228d71ae5a4SJacob Faibussowitsch { 229cd620004SJunchao Zhang PetscSFLink link = NULL; 23095fce210SBarry Smith 23195fce210SBarry Smith PetscFunctionBegin; 2329566063dSJacob Faibussowitsch PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 2339566063dSJacob Faibussowitsch PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 23495fce210SBarry Smith PetscFunctionReturn(0); 23595fce210SBarry Smith } 23695fce210SBarry Smith 237d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 238d71ae5a4SJacob Faibussowitsch { 239cd620004SJunchao Zhang PetscSFLink link = NULL; 24095fce210SBarry Smith 24195fce210SBarry Smith PetscFunctionBegin; 2429566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 2439566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 2449566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op)); 2459566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 24695fce210SBarry Smith PetscFunctionReturn(0); 24795fce210SBarry Smith } 24895fce210SBarry Smith 249d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 250d71ae5a4SJacob Faibussowitsch { 251cd620004SJunchao Zhang PetscSFLink link = NULL; 25295fce210SBarry Smith 25395fce210SBarry Smith PetscFunctionBegin; 2549566063dSJacob Faibussowitsch PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 2559566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 25695fce210SBarry Smith PetscFunctionReturn(0); 25795fce210SBarry Smith } 25895fce210SBarry Smith 259d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 260d71ae5a4SJacob Faibussowitsch { 261cd620004SJunchao Zhang PetscSFLink link = NULL; 26295fce210SBarry Smith 26395fce210SBarry Smith PetscFunctionBegin; 2649566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 26595fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 2669566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 267cd620004SJunchao Zhang /* Do fetch-and-op, the (remote) update results are in rootbuf */ 2689566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 269cd620004SJunchao Zhang /* Bcast rootbuf to leafupdate */ 2709566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 2719566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 272b23bfdefSJunchao Zhang /* Unpack and insert fetched data into leaves */ 2739566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 2749566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 27595fce210SBarry Smith PetscFunctionReturn(0); 27695fce210SBarry Smith } 27795fce210SBarry Smith 278d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 279d71ae5a4SJacob Faibussowitsch { 2808750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 2818750ddebSJunchao Zhang 2828750ddebSJunchao Zhang PetscFunctionBegin; 2838750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 2848750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 2858750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 2868750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 2878750ddebSJunchao Zhang PetscFunctionReturn(0); 2888750ddebSJunchao Zhang } 2898750ddebSJunchao Zhang 290*da81f932SPierre Jolivet /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf. 291f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 292f659e5c7SJunchao Zhang was sorted before calling the routine. 293f659e5c7SJunchao Zhang */ 294d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 295d71ae5a4SJacob Faibussowitsch { 296f659e5c7SJunchao Zhang PetscSF esf; 297cd620004SJunchao Zhang PetscInt esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote; 298cd620004SJunchao Zhang PetscInt i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal; 299cd620004SJunchao Zhang char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */ 300f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 301f659e5c7SJunchao Zhang const PetscMPIInt *ranks, *iranks; 302cd620004SJunchao Zhang const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc; 303f659e5c7SJunchao Zhang PetscBool connected; 304f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 305f659e5c7SJunchao Zhang PetscSF_Basic *bas; 306f659e5c7SJunchao Zhang 307f659e5c7SJunchao Zhang PetscFunctionBegin; 3089566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf)); 3099566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(esf)); 3109566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 311f659e5c7SJunchao Zhang 312cd620004SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 3139566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 3149566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 315cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 3169566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 317cd620004SJunchao Zhang leafdata = leafmem - minleaf; 318f659e5c7SJunchao Zhang /* Tag selected roots */ 319f659e5c7SJunchao Zhang for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1; 320f659e5c7SJunchao Zhang 3219566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 3229566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 3239566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */ 324cd620004SJunchao Zhang esf_nranks = esf_ndranks = esf_nleaves = 0; 325b23bfdefSJunchao Zhang for (i = 0; i < nranks; i++) { 326cd620004SJunchao Zhang connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 3279371c9d4SSatish Balay for (j = roffset[i]; j < roffset[i + 1]; j++) { 3289371c9d4SSatish Balay if (leafdata[rmine[j]]) { 3299371c9d4SSatish Balay esf_nleaves++; 3309371c9d4SSatish Balay connected = PETSC_TRUE; 3319371c9d4SSatish Balay } 3329371c9d4SSatish Balay } 3339371c9d4SSatish Balay if (connected) { 3349371c9d4SSatish Balay esf_nranks++; 3359371c9d4SSatish Balay if (i < ndranks) esf_ndranks++; 3369371c9d4SSatish Balay } 337f659e5c7SJunchao Zhang } 338f659e5c7SJunchao Zhang 339f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 3409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 3419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 3429566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote)); 343f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 344f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 345f659e5c7SJunchao Zhang esf_roffset[0] = 0; 346f659e5c7SJunchao Zhang for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */ 347f659e5c7SJunchao Zhang connected = PETSC_FALSE; 348cd620004SJunchao Zhang for (j = roffset[i]; j < roffset[i + 1]; j++) { 349cd620004SJunchao Zhang if (leafdata[rmine[j]]) { 350f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 351f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 352f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 353f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 354f659e5c7SJunchao Zhang connected = PETSC_TRUE; 355f659e5c7SJunchao Zhang q++; 356f659e5c7SJunchao Zhang } 357f659e5c7SJunchao Zhang } 358f659e5c7SJunchao Zhang if (connected) { 359f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 360f659e5c7SJunchao Zhang esf_roffset[p + 1] = q; 361f659e5c7SJunchao Zhang p++; 362f659e5c7SJunchao Zhang } 363f659e5c7SJunchao Zhang } 364f659e5c7SJunchao Zhang 365f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 3669566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 367f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 368f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 369f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 370f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 371f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 372f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 373cd620004SJunchao Zhang esf->nleafreqs = esf_nranks - esf_ndranks; 374f659e5c7SJunchao Zhang 375f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 376f659e5c7SJunchao Zhang bas = (PetscSF_Basic *)esf->data; 3779566063dSJacob Faibussowitsch PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */ 378f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 379cd620004SJunchao Zhang we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 380f659e5c7SJunchao Zhang */ 3819566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset)); 3829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc)); 383f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 384f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 385f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 386f659e5c7SJunchao Zhang for (i = 0; i < niranks; i++) { 387f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 388f659e5c7SJunchao Zhang for (j = ioffset[i]; j < ioffset[i + 1]; j++) { 389cd620004SJunchao Zhang if (rootdata[irootloc[j]]) { 390f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 391f659e5c7SJunchao Zhang connected = PETSC_TRUE; 392f659e5c7SJunchao Zhang } 393f659e5c7SJunchao Zhang } 394f659e5c7SJunchao Zhang if (connected) { 395f659e5c7SJunchao Zhang bas->niranks++; 396f659e5c7SJunchao Zhang if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 397f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 398f659e5c7SJunchao Zhang bas->ioffset[p + 1] = q; 399f659e5c7SJunchao Zhang p++; 400f659e5c7SJunchao Zhang } 401f659e5c7SJunchao Zhang } 402f659e5c7SJunchao Zhang bas->itotal = q; 403cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 404cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 405cd620004SJunchao Zhang /* Setup packing related fields */ 4069566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 407f659e5c7SJunchao Zhang 40820c24465SJunchao Zhang /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 40920c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 41020c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_CUDA) { 41171438e86SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_CUDA; 41271438e86SJunchao Zhang esf->ops->Free = PetscSFFree_CUDA; 41320c24465SJunchao Zhang } 41420c24465SJunchao Zhang #endif 41520c24465SJunchao Zhang 41659af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 41759af0bd3SScott Kruger /* TODO: Needs debugging */ 41859af0bd3SScott Kruger if (esf->backend == PETSCSF_BACKEND_HIP) { 41959af0bd3SScott Kruger esf->ops->Malloc = PetscSFMalloc_HIP; 42059af0bd3SScott Kruger esf->ops->Free = PetscSFFree_HIP; 42159af0bd3SScott Kruger } 42259af0bd3SScott Kruger #endif 42359af0bd3SScott Kruger 42420c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 42520c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 42620c24465SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_Kokkos; 42720c24465SJunchao Zhang esf->ops->Free = PetscSFFree_Kokkos; 42820c24465SJunchao Zhang } 42920c24465SJunchao Zhang #endif 430f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 4319566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem)); 432f659e5c7SJunchao Zhang *newsf = esf; 433f659e5c7SJunchao Zhang PetscFunctionReturn(0); 434f659e5c7SJunchao Zhang } 435f659e5c7SJunchao Zhang 436d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 437d71ae5a4SJacob Faibussowitsch { 43840e23c03SJunchao Zhang PetscSF_Basic *dat; 43995fce210SBarry Smith 44095fce210SBarry Smith PetscFunctionBegin; 44195fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 44295fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 44395fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 44495fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 445ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 446ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 44795fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 44895fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 44995fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 45095fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 4518750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 45272502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 45395fce210SBarry Smith 4544dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 45540e23c03SJunchao Zhang sf->data = (void *)dat; 45695fce210SBarry Smith PetscFunctionReturn(0); 45795fce210SBarry Smith } 458