140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4b23bfdefSJunchao Zhang 5f5d27ee7SJunchao Zhang // Init persistent MPI send/recv requests 6f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 7f5d27ee7SJunchao Zhang { 8f5d27ee7SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 9*6497c311SBarry Smith PetscInt cnt; 10*6497c311SBarry Smith PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks; 11f5d27ee7SJunchao Zhang const PetscInt *rootoffset, *leafoffset; 12f5d27ee7SJunchao Zhang MPI_Aint disp; 13f5d27ee7SJunchao Zhang MPI_Comm comm = PetscObjectComm((PetscObject)sf); 14f5d27ee7SJunchao Zhang MPI_Datatype unit = link->unit; 15f5d27ee7SJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */ 16f5d27ee7SJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 17f5d27ee7SJunchao Zhang 18f5d27ee7SJunchao Zhang PetscFunctionBegin; 19f5d27ee7SJunchao Zhang if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) { 20f5d27ee7SJunchao Zhang PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 21f5d27ee7SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT) { 22*6497c311SBarry Smith for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 23f5d27ee7SJunchao Zhang disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 24f5d27ee7SJunchao Zhang cnt = rootoffset[i + 1] - rootoffset[i]; 25f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 26f5d27ee7SJunchao Zhang } 27f5d27ee7SJunchao Zhang } else { /* PETSCSF_ROOT2LEAF */ 28*6497c311SBarry Smith for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 29f5d27ee7SJunchao Zhang disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 30f5d27ee7SJunchao Zhang cnt = rootoffset[i + 1] - rootoffset[i]; 31f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 32f5d27ee7SJunchao Zhang } 33f5d27ee7SJunchao Zhang } 34f5d27ee7SJunchao Zhang link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE; 35f5d27ee7SJunchao Zhang } 36f5d27ee7SJunchao Zhang 37f5d27ee7SJunchao Zhang if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) { 38f5d27ee7SJunchao Zhang PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 39f5d27ee7SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT) { 40*6497c311SBarry Smith for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 41f5d27ee7SJunchao Zhang disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 42f5d27ee7SJunchao Zhang cnt = leafoffset[i + 1] - leafoffset[i]; 43f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 44f5d27ee7SJunchao Zhang } 45f5d27ee7SJunchao Zhang } else { /* PETSCSF_ROOT2LEAF */ 46*6497c311SBarry Smith for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 47f5d27ee7SJunchao Zhang disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 48f5d27ee7SJunchao Zhang cnt = leafoffset[i + 1] - leafoffset[i]; 49f5d27ee7SJunchao Zhang PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 50f5d27ee7SJunchao Zhang } 51f5d27ee7SJunchao Zhang } 52f5d27ee7SJunchao Zhang link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE; 53f5d27ee7SJunchao Zhang } 54f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 55f5d27ee7SJunchao Zhang } 56f5d27ee7SJunchao Zhang 57f5d27ee7SJunchao Zhang // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf 58f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 59f5d27ee7SJunchao Zhang { 60646b835dSJunchao Zhang PetscMPIInt nsreqs = 0, nrreqs = 0; 61646b835dSJunchao Zhang MPI_Request *sreqs = NULL, *rreqs = NULL; 62f5d27ee7SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 63646b835dSJunchao Zhang PetscInt sbuflen, rbuflen; 64f5d27ee7SJunchao Zhang 65f5d27ee7SJunchao Zhang PetscFunctionBegin; 66646b835dSJunchao Zhang rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE]; 67646b835dSJunchao Zhang if (rbuflen) { 68f5d27ee7SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 69646b835dSJunchao Zhang nrreqs = sf->nleafreqs; 70646b835dSJunchao Zhang PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs)); 71f5d27ee7SJunchao Zhang } else { /* leaf to root */ 72646b835dSJunchao Zhang nrreqs = bas->nrootreqs; 73646b835dSJunchao Zhang PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL)); 74f5d27ee7SJunchao Zhang } 75f5d27ee7SJunchao Zhang } 76f5d27ee7SJunchao Zhang 77646b835dSJunchao Zhang sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE]; 78646b835dSJunchao Zhang if (sbuflen) { 79f5d27ee7SJunchao Zhang if (direction == PETSCSF_ROOT2LEAF) { 80646b835dSJunchao Zhang nsreqs = bas->nrootreqs; 81f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */)); 82646b835dSJunchao Zhang PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL)); 83f5d27ee7SJunchao Zhang } else { /* leaf to root */ 84646b835dSJunchao Zhang nsreqs = sf->nleafreqs; 85f5d27ee7SJunchao Zhang PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE)); 86646b835dSJunchao Zhang PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs)); 87f5d27ee7SJunchao Zhang } 88f5d27ee7SJunchao Zhang } 89646b835dSJunchao Zhang PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready 90646b835dSJunchao Zhang if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs)); 91646b835dSJunchao Zhang if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs)); 92f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 93f5d27ee7SJunchao Zhang } 94f5d27ee7SJunchao Zhang 95f5d27ee7SJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM) 96f5d27ee7SJunchao Zhang // issue MPIX_Isend/Irecv_enqueue() 97f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 98f5d27ee7SJunchao Zhang { 99f5d27ee7SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 100*6497c311SBarry Smith PetscInt i, j; 101*6497c311SBarry Smith PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks, cnt; 102f5d27ee7SJunchao Zhang const PetscInt *rootoffset, *leafoffset; 103f5d27ee7SJunchao Zhang MPI_Aint disp; 104f5d27ee7SJunchao Zhang MPI_Comm stream_comm = sf->stream_comm; 105f5d27ee7SJunchao Zhang MPI_Datatype unit = link->unit; 106f5d27ee7SJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */ 107f5d27ee7SJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 108f5d27ee7SJunchao Zhang 109f5d27ee7SJunchao Zhang PetscFunctionBegin; 110f5d27ee7SJunchao Zhang if (bas->rootbuflen[PETSCSF_REMOTE]) { 111f5d27ee7SJunchao Zhang PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL)); 112f5d27ee7SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT) { 113f5d27ee7SJunchao Zhang for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 114f5d27ee7SJunchao Zhang disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 115*6497c311SBarry Smith cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]); 116f5d27ee7SJunchao Zhang PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 117f5d27ee7SJunchao Zhang } 118f5d27ee7SJunchao Zhang } else { // PETSCSF_ROOT2LEAF 119f5d27ee7SJunchao Zhang for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) { 120f5d27ee7SJunchao Zhang disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes; 121*6497c311SBarry Smith cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]); 122f5d27ee7SJunchao Zhang // no need to sync the gpu stream! 123f5d27ee7SJunchao Zhang PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j)); 124f5d27ee7SJunchao Zhang } 125f5d27ee7SJunchao Zhang } 126f5d27ee7SJunchao Zhang } 127f5d27ee7SJunchao Zhang 128f5d27ee7SJunchao Zhang if (sf->leafbuflen[PETSCSF_REMOTE]) { 129f5d27ee7SJunchao Zhang PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL)); 130f5d27ee7SJunchao Zhang if (direction == PETSCSF_LEAF2ROOT) { 131f5d27ee7SJunchao Zhang for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 132f5d27ee7SJunchao Zhang disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 133*6497c311SBarry Smith cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]); 134f5d27ee7SJunchao Zhang // no need to sync the gpu stream! 135f5d27ee7SJunchao Zhang PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 136f5d27ee7SJunchao Zhang } 137f5d27ee7SJunchao Zhang } else { // PETSCSF_ROOT2LEAF 138f5d27ee7SJunchao Zhang for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) { 139f5d27ee7SJunchao Zhang disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes; 140*6497c311SBarry Smith cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]); 141f5d27ee7SJunchao Zhang PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j)); 142f5d27ee7SJunchao Zhang } 143f5d27ee7SJunchao Zhang } 144f5d27ee7SJunchao Zhang } 145f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 146f5d27ee7SJunchao Zhang } 147f5d27ee7SJunchao Zhang 148f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction) 149f5d27ee7SJunchao Zhang { 150f5d27ee7SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 151f5d27ee7SJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; 152f5d27ee7SJunchao Zhang const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi; 153f5d27ee7SJunchao Zhang 154f5d27ee7SJunchao Zhang PetscFunctionBegin; 155f5d27ee7SJunchao Zhang PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE)); 156f5d27ee7SJunchao Zhang PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE)); 157f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 158f5d27ee7SJunchao Zhang } 159f5d27ee7SJunchao Zhang #endif 160f5d27ee7SJunchao Zhang 161f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link) 162f5d27ee7SJunchao Zhang { 163f5d27ee7SJunchao Zhang PetscFunctionBegin; 164f5d27ee7SJunchao Zhang link->InitMPIRequests = PetscSFLinkInitMPIRequests_Persistent_Basic; 165f5d27ee7SJunchao Zhang link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic; 166f5d27ee7SJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM) 1676677b1c1SJunchao Zhang const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; 168f5d27ee7SJunchao Zhang if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) { 169f5d27ee7SJunchao Zhang link->StartCommunication = PetscSFLinkStartCommunication_MPIX_Stream; 170f5d27ee7SJunchao Zhang link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream; 171f5d27ee7SJunchao Zhang } 172f5d27ee7SJunchao Zhang #endif 173f5d27ee7SJunchao Zhang PetscFunctionReturn(PETSC_SUCCESS); 174f5d27ee7SJunchao Zhang } 175f5d27ee7SJunchao Zhang 17640e23c03SJunchao Zhang /*===================================================================================*/ 17740e23c03SJunchao Zhang /* SF public interface implementations */ 17840e23c03SJunchao Zhang /*===================================================================================*/ 179d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 180d71ae5a4SJacob Faibussowitsch { 181b23bfdefSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 182*6497c311SBarry Smith PetscInt *rlengths, *ilengths; 183*6497c311SBarry Smith PetscMPIInt nRemoteRootRanks, nRemoteLeafRanks; 18440e23c03SJunchao Zhang PetscMPIInt rank, niranks, *iranks, tag; 18595fce210SBarry Smith MPI_Comm comm; 186b5a8e515SJed Brown MPI_Group group; 18740e23c03SJunchao Zhang MPI_Request *rootreqs, *leafreqs; 18895fce210SBarry Smith 18995fce210SBarry Smith PetscFunctionBegin; 1909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group)); 1919566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpRanks(sf, group)); 1929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group)); 1939566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 1949566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag)); 1959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 19695fce210SBarry Smith /* 19795fce210SBarry Smith * Inform roots about how many leaves and from which ranks 19895fce210SBarry Smith */ 1999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &rlengths)); 200cd620004SJunchao Zhang /* Determine number, sending ranks and length of incoming */ 201*6497c311SBarry Smith for (PetscMPIInt i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ } 20271438e86SJunchao Zhang nRemoteRootRanks = sf->nranks - sf->ndranks; 20316cd844bSPierre Jolivet PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths)); 204c943f53fSJed Brown 2050b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 2060b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 2070b899082SJunchao Zhang small and the sorting is cheap. 2080b899082SJunchao Zhang */ 2099566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths)); 2100b899082SJunchao Zhang 211c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 212c943f53fSJed Brown bas->ndiranks = sf->ndranks; 213c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 2149566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset)); 215c943f53fSJed Brown bas->ioffset[0] = 0; 216*6497c311SBarry Smith for (PetscMPIInt i = 0; i < bas->ndiranks; i++) { 217c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 218c943f53fSJed Brown bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i]; 219c943f53fSJed Brown } 220c9cc58a2SBarry Smith PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks"); 221*6497c311SBarry Smith for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) { 222c943f53fSJed Brown bas->iranks[i] = iranks[i - bas->ndiranks]; 223c943f53fSJed Brown bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks]; 224c943f53fSJed Brown } 225*6497c311SBarry Smith bas->itotal = bas->ioffset[bas->niranks]; 2269566063dSJacob Faibussowitsch PetscCall(PetscFree(rlengths)); 2279566063dSJacob Faibussowitsch PetscCall(PetscFree(iranks)); 2289566063dSJacob Faibussowitsch PetscCall(PetscFree(ilengths)); 22995fce210SBarry Smith 23095fce210SBarry Smith /* Send leaf identities to roots */ 23171438e86SJunchao Zhang nRemoteLeafRanks = bas->niranks - bas->ndiranks; 2329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc)); 2339566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs)); 234*6497c311SBarry Smith for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks])); 235*6497c311SBarry Smith for (PetscMPIInt i = 0; i < sf->nranks; i++) { 236c87b50c4SJunchao Zhang PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i]; 23740e23c03SJunchao Zhang if (i < sf->ndranks) { 23808401ef6SPierre Jolivet PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank"); 23908401ef6SPierre Jolivet PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank"); 24008401ef6SPierre Jolivet PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths"); 2419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints)); 242c943f53fSJed Brown continue; 243c943f53fSJed Brown } 2449566063dSJacob Faibussowitsch PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks])); 245bf39f1bfSJed Brown } 2469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE)); 2479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE)); 24895fce210SBarry Smith 24971438e86SJunchao Zhang sf->nleafreqs = nRemoteRootRanks; 25071438e86SJunchao Zhang bas->nrootreqs = nRemoteLeafRanks; 251eb02082bSJunchao Zhang 25271438e86SJunchao Zhang /* Setup fields related to packing, such as rootbuflen[] */ 2539566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(sf)); 2549566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootreqs, leafreqs)); 2553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25695fce210SBarry Smith } 25795fce210SBarry Smith 258d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 259d71ae5a4SJacob Faibussowitsch { 260cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 26171438e86SJunchao Zhang PetscSFLink link = bas->avail, next; 26295fce210SBarry Smith 26395fce210SBarry Smith PetscFunctionBegin; 26428b400f6SJacob Faibussowitsch PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed"); 2659566063dSJacob Faibussowitsch PetscCall(PetscFree2(bas->iranks, bas->ioffset)); 2669566063dSJacob Faibussowitsch PetscCall(PetscFree(bas->irootloc)); 26771438e86SJunchao Zhang 2687fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 269*6497c311SBarry Smith for (int i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i])); 270eb02082bSJunchao Zhang #endif 27171438e86SJunchao Zhang 27271438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 2739566063dSJacob Faibussowitsch PetscCall(PetscSFReset_Basic_NVSHMEM(sf)); 27471438e86SJunchao Zhang #endif 27571438e86SJunchao Zhang 2769371c9d4SSatish Balay for (; link; link = next) { 2779371c9d4SSatish Balay next = link->next; 2789371c9d4SSatish Balay PetscCall(PetscSFLinkDestroy(sf, link)); 2799371c9d4SSatish Balay } 28071438e86SJunchao Zhang bas->avail = NULL; 2819566063dSJacob Faibussowitsch PetscCall(PetscSFResetPackFields(sf)); 2823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 28395fce210SBarry Smith } 28495fce210SBarry Smith 285d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 286d71ae5a4SJacob Faibussowitsch { 28795fce210SBarry Smith PetscFunctionBegin; 2889566063dSJacob Faibussowitsch PetscCall(PetscSFReset_Basic(sf)); 2899566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->data)); 2903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29195fce210SBarry Smith } 29295fce210SBarry Smith 29362152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 29462152dedSBarry Smith #include <petscmat.h> 29562152dedSBarry Smith 296d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer) 297d71ae5a4SJacob Faibussowitsch { 29862152dedSBarry Smith PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 299*6497c311SBarry Smith PetscMPIInt nrootranks, ndrootranks; 30062152dedSBarry Smith const PetscInt *rootoffset; 30162152dedSBarry Smith PetscMPIInt rank, size; 30253dd6d7dSJunchao Zhang const PetscMPIInt *rootranks; 30362152dedSBarry Smith MPI_Comm comm = PetscObjectComm((PetscObject)sf); 30453dd6d7dSJunchao Zhang PetscScalar unitbytes; 30562152dedSBarry Smith Mat A; 30662152dedSBarry Smith 30762152dedSBarry Smith PetscFunctionBegin; 3089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 3099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 31053dd6d7dSJunchao Zhang /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 31153dd6d7dSJunchao Zhang PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 31253dd6d7dSJunchao Zhang in senders' view point: how many bytes I will send to my neighbors. 31353dd6d7dSJunchao Zhang 31453dd6d7dSJunchao Zhang Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 31553dd6d7dSJunchao Zhang 31653dd6d7dSJunchao Zhang If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 31753dd6d7dSJunchao Zhang different interpretations for the same SF for different data types. Since we most care about VecScatter, 31853dd6d7dSJunchao Zhang we uniformly treat each vertex as a PetscScalar. 31953dd6d7dSJunchao Zhang */ 32053dd6d7dSJunchao Zhang unitbytes = (PetscScalar)sizeof(PetscScalar); 32153dd6d7dSJunchao Zhang 3229566063dSJacob Faibussowitsch PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL)); 3239566063dSJacob Faibussowitsch PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A)); 3249566063dSJacob Faibussowitsch PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */ 325*6497c311SBarry Smith for (PetscMPIInt i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES)); 3269566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 3279566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 3289566063dSJacob Faibussowitsch PetscCall(MatView(A, viewer)); 3299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 3303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 33162152dedSBarry Smith } 33262152dedSBarry Smith #endif 33362152dedSBarry Smith 334d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer) 335d71ae5a4SJacob Faibussowitsch { 33653dd6d7dSJunchao Zhang PetscBool isascii; 33795fce210SBarry Smith 33895fce210SBarry Smith PetscFunctionBegin; 3399566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3409566063dSJacob Faibussowitsch if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered")); 34162152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 34253dd6d7dSJunchao Zhang else { 34353dd6d7dSJunchao Zhang PetscBool isdraw, isbinary; 3449566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 3459566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 34648a46eb9SPierre Jolivet if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer)); 34762152dedSBarry Smith } 34862152dedSBarry Smith #endif 3493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 35095fce210SBarry Smith } 35195fce210SBarry Smith 352f5d27ee7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 353d71ae5a4SJacob Faibussowitsch { 354cd620004SJunchao Zhang PetscSFLink link = NULL; 35595fce210SBarry Smith 35695fce210SBarry Smith PetscFunctionBegin; 35771438e86SJunchao Zhang /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 3589566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 35971438e86SJunchao Zhang /* Pack rootdata to rootbuf for remote communication */ 3609566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 361*6497c311SBarry Smith /* Start communication, e.g., post MPIU_Isend */ 3629566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 36371438e86SJunchao Zhang /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 3649566063dSJacob Faibussowitsch PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op)); 3653ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 36695fce210SBarry Smith } 36795fce210SBarry Smith 368d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) 369d71ae5a4SJacob Faibussowitsch { 370cd620004SJunchao Zhang PetscSFLink link = NULL; 37195fce210SBarry Smith 37295fce210SBarry Smith PetscFunctionBegin; 373cd620004SJunchao Zhang /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 3749566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 37571438e86SJunchao Zhang /* Finish remote communication, e.g., post MPI_Waitall */ 3769566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 37771438e86SJunchao Zhang /* Unpack data in leafbuf to leafdata for remote communication */ 3789566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op)); 37971438e86SJunchao Zhang /* Recycle the link */ 3809566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 3813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 382cd620004SJunchao Zhang } 383cd620004SJunchao Zhang 384cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */ 385d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out) 386d71ae5a4SJacob Faibussowitsch { 38771438e86SJunchao Zhang PetscSFLink link = NULL; 388cd620004SJunchao Zhang 389cd620004SJunchao Zhang PetscFunctionBegin; 3909566063dSJacob Faibussowitsch PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link)); 3919566063dSJacob Faibussowitsch PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 3929566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT)); 393cd620004SJunchao Zhang *out = link; 3943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 39595fce210SBarry Smith } 39695fce210SBarry Smith 39795fce210SBarry Smith /* leaf -> root with reduction */ 398f5d27ee7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 399d71ae5a4SJacob Faibussowitsch { 400cd620004SJunchao Zhang PetscSFLink link = NULL; 40195fce210SBarry Smith 40295fce210SBarry Smith PetscFunctionBegin; 4039566063dSJacob Faibussowitsch PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link)); 4049566063dSJacob Faibussowitsch PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op)); 4053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 40695fce210SBarry Smith } 40795fce210SBarry Smith 408d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) 409d71ae5a4SJacob Faibussowitsch { 410cd620004SJunchao Zhang PetscSFLink link = NULL; 41195fce210SBarry Smith 41295fce210SBarry Smith PetscFunctionBegin; 4139566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 4149566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 4159566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op)); 4169566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 4173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 41895fce210SBarry Smith } 41995fce210SBarry Smith 420d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op) 421d71ae5a4SJacob Faibussowitsch { 422cd620004SJunchao Zhang PetscSFLink link = NULL; 42395fce210SBarry Smith 42495fce210SBarry Smith PetscFunctionBegin; 4259566063dSJacob Faibussowitsch PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link)); 4269566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op)); 4273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 42895fce210SBarry Smith } 42995fce210SBarry Smith 430f5d27ee7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) 431d71ae5a4SJacob Faibussowitsch { 432cd620004SJunchao Zhang PetscSFLink link = NULL; 43395fce210SBarry Smith 43495fce210SBarry Smith PetscFunctionBegin; 4359566063dSJacob Faibussowitsch PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link)); 43695fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 4379566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT)); 438cd620004SJunchao Zhang /* Do fetch-and-op, the (remote) update results are in rootbuf */ 4399566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op)); 440cd620004SJunchao Zhang /* Bcast rootbuf to leafupdate */ 4419566063dSJacob Faibussowitsch PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF)); 4429566063dSJacob Faibussowitsch PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF)); 443b23bfdefSJunchao Zhang /* Unpack and insert fetched data into leaves */ 4449566063dSJacob Faibussowitsch PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE)); 4459566063dSJacob Faibussowitsch PetscCall(PetscSFLinkReclaim(sf, &link)); 4463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 44795fce210SBarry Smith } 44895fce210SBarry Smith 449*6497c311SBarry Smith PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) 450d71ae5a4SJacob Faibussowitsch { 4518750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic *)sf->data; 4528750ddebSJunchao Zhang 4538750ddebSJunchao Zhang PetscFunctionBegin; 4548750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 4558750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 4568750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 4578750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 4583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4598750ddebSJunchao Zhang } 4608750ddebSJunchao Zhang 461da81f932SPierre Jolivet /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf. 462f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 463f659e5c7SJunchao Zhang was sorted before calling the routine. 464f659e5c7SJunchao Zhang */ 465d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 466d71ae5a4SJacob Faibussowitsch { 467f659e5c7SJunchao Zhang PetscSF esf; 468*6497c311SBarry Smith PetscInt *esf_roffset, *esf_rmine, *esf_rremote; 469*6497c311SBarry Smith PetscInt j, p, q, nroots, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal; 4708e3a54c0SPierre Jolivet char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */ 471*6497c311SBarry Smith PetscMPIInt *esf_ranks, nranks, ndranks, niranks, esf_nranks, esf_ndranks, ndiranks; 472f659e5c7SJunchao Zhang const PetscMPIInt *ranks, *iranks; 473cd620004SJunchao Zhang const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc; 474f659e5c7SJunchao Zhang PetscBool connected; 475f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 476f659e5c7SJunchao Zhang PetscSF_Basic *bas; 477f659e5c7SJunchao Zhang 478f659e5c7SJunchao Zhang PetscFunctionBegin; 4799566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf)); 4809566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(esf)); 4819566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 482f659e5c7SJunchao Zhang 483cd620004SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 4849566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL)); 4859566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf)); 486cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 4879566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem)); 4888e3a54c0SPierre Jolivet leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf); 489f659e5c7SJunchao Zhang /* Tag selected roots */ 490*6497c311SBarry Smith for (PetscInt i = 0; i < nselected; ++i) rootdata[selected[i]] = 1; 491f659e5c7SJunchao Zhang 4929566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 4939566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE)); 4949566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */ 495cd620004SJunchao Zhang esf_nranks = esf_ndranks = esf_nleaves = 0; 496*6497c311SBarry Smith for (PetscMPIInt i = 0; i < nranks; i++) { 497cd620004SJunchao Zhang connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 4989371c9d4SSatish Balay for (j = roffset[i]; j < roffset[i + 1]; j++) { 4999371c9d4SSatish Balay if (leafdata[rmine[j]]) { 5009371c9d4SSatish Balay esf_nleaves++; 5019371c9d4SSatish Balay connected = PETSC_TRUE; 5029371c9d4SSatish Balay } 5039371c9d4SSatish Balay } 5049371c9d4SSatish Balay if (connected) { 5059371c9d4SSatish Balay esf_nranks++; 5069371c9d4SSatish Balay if (i < ndranks) esf_ndranks++; 5079371c9d4SSatish Balay } 508f659e5c7SJunchao Zhang } 509f659e5c7SJunchao Zhang 510f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 5119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal)); 5129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote)); 5139566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote)); 514f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 515f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 516f659e5c7SJunchao Zhang esf_roffset[0] = 0; 517*6497c311SBarry Smith for (PetscMPIInt i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */ 518f659e5c7SJunchao Zhang connected = PETSC_FALSE; 519cd620004SJunchao Zhang for (j = roffset[i]; j < roffset[i + 1]; j++) { 520cd620004SJunchao Zhang if (leafdata[rmine[j]]) { 521f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 522f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 523f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 524f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 525f659e5c7SJunchao Zhang connected = PETSC_TRUE; 526f659e5c7SJunchao Zhang q++; 527f659e5c7SJunchao Zhang } 528f659e5c7SJunchao Zhang } 529f659e5c7SJunchao Zhang if (connected) { 530f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 531f659e5c7SJunchao Zhang esf_roffset[p + 1] = q; 532f659e5c7SJunchao Zhang p++; 533f659e5c7SJunchao Zhang } 534f659e5c7SJunchao Zhang } 535f659e5c7SJunchao Zhang 536f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 5379566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER)); 538f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 539f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 540f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 541f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 542f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 543f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 544cd620004SJunchao Zhang esf->nleafreqs = esf_nranks - esf_ndranks; 545f659e5c7SJunchao Zhang 546f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 547f659e5c7SJunchao Zhang bas = (PetscSF_Basic *)esf->data; 5489566063dSJacob Faibussowitsch PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */ 549f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 550cd620004SJunchao Zhang we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 551f659e5c7SJunchao Zhang */ 5529566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset)); 5539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc)); 554f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 555f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 556f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 557*6497c311SBarry Smith for (PetscMPIInt i = 0; i < niranks; i++) { 558f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 559f659e5c7SJunchao Zhang for (j = ioffset[i]; j < ioffset[i + 1]; j++) { 560cd620004SJunchao Zhang if (rootdata[irootloc[j]]) { 561f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 562f659e5c7SJunchao Zhang connected = PETSC_TRUE; 563f659e5c7SJunchao Zhang } 564f659e5c7SJunchao Zhang } 565f659e5c7SJunchao Zhang if (connected) { 566f659e5c7SJunchao Zhang bas->niranks++; 567f659e5c7SJunchao Zhang if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 568f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 569f659e5c7SJunchao Zhang bas->ioffset[p + 1] = q; 570f659e5c7SJunchao Zhang p++; 571f659e5c7SJunchao Zhang } 572f659e5c7SJunchao Zhang } 573f659e5c7SJunchao Zhang bas->itotal = q; 574cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 575cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 576cd620004SJunchao Zhang /* Setup packing related fields */ 5779566063dSJacob Faibussowitsch PetscCall(PetscSFSetUpPackFields(esf)); 578f659e5c7SJunchao Zhang 57920c24465SJunchao Zhang /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 58020c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 58120c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_CUDA) { 58271438e86SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_CUDA; 58371438e86SJunchao Zhang esf->ops->Free = PetscSFFree_CUDA; 58420c24465SJunchao Zhang } 58520c24465SJunchao Zhang #endif 58620c24465SJunchao Zhang 58759af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 58859af0bd3SScott Kruger /* TODO: Needs debugging */ 58959af0bd3SScott Kruger if (esf->backend == PETSCSF_BACKEND_HIP) { 59059af0bd3SScott Kruger esf->ops->Malloc = PetscSFMalloc_HIP; 59159af0bd3SScott Kruger esf->ops->Free = PetscSFFree_HIP; 59259af0bd3SScott Kruger } 59359af0bd3SScott Kruger #endif 59459af0bd3SScott Kruger 59520c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 59620c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 59720c24465SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_Kokkos; 59820c24465SJunchao Zhang esf->ops->Free = PetscSFFree_Kokkos; 59920c24465SJunchao Zhang } 60020c24465SJunchao Zhang #endif 601f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 6029566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem)); 603f659e5c7SJunchao Zhang *newsf = esf; 6043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 605f659e5c7SJunchao Zhang } 606f659e5c7SJunchao Zhang 607d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 608d71ae5a4SJacob Faibussowitsch { 60940e23c03SJunchao Zhang PetscSF_Basic *dat; 61095fce210SBarry Smith 61195fce210SBarry Smith PetscFunctionBegin; 61295fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 61395fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 61495fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 61595fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 616ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 617ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 61895fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 61995fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 62095fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 62195fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 6228750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 62372502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 624f5d27ee7SJunchao Zhang sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Basic; 62595fce210SBarry Smith 6266677b1c1SJunchao Zhang sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv 6276677b1c1SJunchao Zhang sf->collective = PETSC_FALSE; 6286677b1c1SJunchao Zhang 6294dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&dat)); 63040e23c03SJunchao Zhang sf->data = (void *)dat; 6313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 63295fce210SBarry Smith } 633