xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 646b835dd74e42d2b8324facdc6838bbab01abab) !
140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4b23bfdefSJunchao Zhang 
5f5d27ee7SJunchao Zhang // Init persistent MPI send/recv requests
6f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
7f5d27ee7SJunchao Zhang {
8f5d27ee7SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
9f5d27ee7SJunchao Zhang   PetscInt           i, j, cnt, nrootranks, ndrootranks, nleafranks, ndleafranks;
10f5d27ee7SJunchao Zhang   const PetscInt    *rootoffset, *leafoffset;
11f5d27ee7SJunchao Zhang   MPI_Aint           disp;
12f5d27ee7SJunchao Zhang   MPI_Comm           comm          = PetscObjectComm((PetscObject)sf);
13f5d27ee7SJunchao Zhang   MPI_Datatype       unit          = link->unit;
14f5d27ee7SJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
15f5d27ee7SJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
16f5d27ee7SJunchao Zhang 
17f5d27ee7SJunchao Zhang   PetscFunctionBegin;
18f5d27ee7SJunchao Zhang   if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
19f5d27ee7SJunchao Zhang     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
20f5d27ee7SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
21f5d27ee7SJunchao Zhang       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
22f5d27ee7SJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
23f5d27ee7SJunchao Zhang         cnt  = rootoffset[i + 1] - rootoffset[i];
24f5d27ee7SJunchao Zhang         PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
25f5d27ee7SJunchao Zhang       }
26f5d27ee7SJunchao Zhang     } else { /* PETSCSF_ROOT2LEAF */
27f5d27ee7SJunchao Zhang       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
28f5d27ee7SJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
29f5d27ee7SJunchao Zhang         cnt  = rootoffset[i + 1] - rootoffset[i];
30f5d27ee7SJunchao Zhang         PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
31f5d27ee7SJunchao Zhang       }
32f5d27ee7SJunchao Zhang     }
33f5d27ee7SJunchao Zhang     link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
34f5d27ee7SJunchao Zhang   }
35f5d27ee7SJunchao Zhang 
36f5d27ee7SJunchao Zhang   if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) {
37f5d27ee7SJunchao Zhang     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
38f5d27ee7SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
39f5d27ee7SJunchao Zhang       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
40f5d27ee7SJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
41f5d27ee7SJunchao Zhang         cnt  = leafoffset[i + 1] - leafoffset[i];
42f5d27ee7SJunchao Zhang         PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
43f5d27ee7SJunchao Zhang       }
44f5d27ee7SJunchao Zhang     } else { /* PETSCSF_ROOT2LEAF */
45f5d27ee7SJunchao Zhang       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
46f5d27ee7SJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
47f5d27ee7SJunchao Zhang         cnt  = leafoffset[i + 1] - leafoffset[i];
48f5d27ee7SJunchao Zhang         PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
49f5d27ee7SJunchao Zhang       }
50f5d27ee7SJunchao Zhang     }
51f5d27ee7SJunchao Zhang     link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE;
52f5d27ee7SJunchao Zhang   }
53f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
54f5d27ee7SJunchao Zhang }
55f5d27ee7SJunchao Zhang 
56f5d27ee7SJunchao Zhang // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
57f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
58f5d27ee7SJunchao Zhang {
59*646b835dSJunchao Zhang   PetscMPIInt    nsreqs = 0, nrreqs = 0;
60*646b835dSJunchao Zhang   MPI_Request   *sreqs = NULL, *rreqs = NULL;
61f5d27ee7SJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
62*646b835dSJunchao Zhang   PetscInt       sbuflen, rbuflen;
63f5d27ee7SJunchao Zhang 
64f5d27ee7SJunchao Zhang   PetscFunctionBegin;
65*646b835dSJunchao Zhang   rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE];
66*646b835dSJunchao Zhang   if (rbuflen) {
67f5d27ee7SJunchao Zhang     if (direction == PETSCSF_ROOT2LEAF) {
68*646b835dSJunchao Zhang       nrreqs = sf->nleafreqs;
69*646b835dSJunchao Zhang       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs));
70f5d27ee7SJunchao Zhang     } else { /* leaf to root */
71*646b835dSJunchao Zhang       nrreqs = bas->nrootreqs;
72*646b835dSJunchao Zhang       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL));
73f5d27ee7SJunchao Zhang     }
74f5d27ee7SJunchao Zhang   }
75f5d27ee7SJunchao Zhang 
76*646b835dSJunchao Zhang   sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE];
77*646b835dSJunchao Zhang   if (sbuflen) {
78f5d27ee7SJunchao Zhang     if (direction == PETSCSF_ROOT2LEAF) {
79*646b835dSJunchao Zhang       nsreqs = bas->nrootreqs;
80f5d27ee7SJunchao Zhang       PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */));
81*646b835dSJunchao Zhang       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL));
82f5d27ee7SJunchao Zhang     } else { /* leaf to root */
83*646b835dSJunchao Zhang       nsreqs = sf->nleafreqs;
84f5d27ee7SJunchao Zhang       PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE));
85*646b835dSJunchao Zhang       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs));
86f5d27ee7SJunchao Zhang     }
87f5d27ee7SJunchao Zhang   }
88*646b835dSJunchao Zhang   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready
89*646b835dSJunchao Zhang   if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs));
90*646b835dSJunchao Zhang   if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs));
91f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
92f5d27ee7SJunchao Zhang }
93f5d27ee7SJunchao Zhang 
94f5d27ee7SJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM)
95f5d27ee7SJunchao Zhang // issue MPIX_Isend/Irecv_enqueue()
96f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
97f5d27ee7SJunchao Zhang {
98f5d27ee7SJunchao Zhang   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
99f5d27ee7SJunchao Zhang   PetscInt           i, j, cnt, nrootranks, ndrootranks, nleafranks, ndleafranks;
100f5d27ee7SJunchao Zhang   const PetscInt    *rootoffset, *leafoffset;
101f5d27ee7SJunchao Zhang   MPI_Aint           disp;
102f5d27ee7SJunchao Zhang   MPI_Comm           stream_comm   = sf->stream_comm;
103f5d27ee7SJunchao Zhang   MPI_Datatype       unit          = link->unit;
104f5d27ee7SJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
105f5d27ee7SJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
106f5d27ee7SJunchao Zhang 
107f5d27ee7SJunchao Zhang   PetscFunctionBegin;
108f5d27ee7SJunchao Zhang   if (bas->rootbuflen[PETSCSF_REMOTE]) {
109f5d27ee7SJunchao Zhang     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
110f5d27ee7SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
111f5d27ee7SJunchao Zhang       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
112f5d27ee7SJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
113f5d27ee7SJunchao Zhang         cnt  = rootoffset[i + 1] - rootoffset[i];
114f5d27ee7SJunchao Zhang         PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
115f5d27ee7SJunchao Zhang       }
116f5d27ee7SJunchao Zhang     } else { // PETSCSF_ROOT2LEAF
117f5d27ee7SJunchao Zhang       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
118f5d27ee7SJunchao Zhang         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
119f5d27ee7SJunchao Zhang         cnt  = rootoffset[i + 1] - rootoffset[i];
120f5d27ee7SJunchao Zhang         // no need to sync the gpu stream!
121f5d27ee7SJunchao Zhang         PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
122f5d27ee7SJunchao Zhang       }
123f5d27ee7SJunchao Zhang     }
124f5d27ee7SJunchao Zhang   }
125f5d27ee7SJunchao Zhang 
126f5d27ee7SJunchao Zhang   if (sf->leafbuflen[PETSCSF_REMOTE]) {
127f5d27ee7SJunchao Zhang     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
128f5d27ee7SJunchao Zhang     if (direction == PETSCSF_LEAF2ROOT) {
129f5d27ee7SJunchao Zhang       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
130f5d27ee7SJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
131f5d27ee7SJunchao Zhang         cnt  = leafoffset[i + 1] - leafoffset[i];
132f5d27ee7SJunchao Zhang         // no need to sync the gpu stream!
133f5d27ee7SJunchao Zhang         PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
134f5d27ee7SJunchao Zhang       }
135f5d27ee7SJunchao Zhang     } else { // PETSCSF_ROOT2LEAF
136f5d27ee7SJunchao Zhang       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
137f5d27ee7SJunchao Zhang         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
138f5d27ee7SJunchao Zhang         cnt  = leafoffset[i + 1] - leafoffset[i];
139f5d27ee7SJunchao Zhang         PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
140f5d27ee7SJunchao Zhang       }
141f5d27ee7SJunchao Zhang     }
142f5d27ee7SJunchao Zhang   }
143f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
144f5d27ee7SJunchao Zhang }
145f5d27ee7SJunchao Zhang 
146f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
147f5d27ee7SJunchao Zhang {
148f5d27ee7SJunchao Zhang   PetscSF_Basic     *bas           = (PetscSF_Basic *)sf->data;
149f5d27ee7SJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
150f5d27ee7SJunchao Zhang   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
151f5d27ee7SJunchao Zhang 
152f5d27ee7SJunchao Zhang   PetscFunctionBegin;
153f5d27ee7SJunchao Zhang   PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
154f5d27ee7SJunchao Zhang   PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
155f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
156f5d27ee7SJunchao Zhang }
157f5d27ee7SJunchao Zhang #endif
158f5d27ee7SJunchao Zhang 
159f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link)
160f5d27ee7SJunchao Zhang {
161f5d27ee7SJunchao Zhang   PetscFunctionBegin;
162f5d27ee7SJunchao Zhang   link->InitMPIRequests    = PetscSFLinkInitMPIRequests_Persistent_Basic;
163f5d27ee7SJunchao Zhang   link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic;
164f5d27ee7SJunchao Zhang #if defined(PETSC_HAVE_MPIX_STREAM)
1656677b1c1SJunchao Zhang   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
166f5d27ee7SJunchao Zhang   if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) {
167f5d27ee7SJunchao Zhang     link->StartCommunication  = PetscSFLinkStartCommunication_MPIX_Stream;
168f5d27ee7SJunchao Zhang     link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream;
169f5d27ee7SJunchao Zhang   }
170f5d27ee7SJunchao Zhang #endif
171f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
172f5d27ee7SJunchao Zhang }
173f5d27ee7SJunchao Zhang 
17440e23c03SJunchao Zhang /*===================================================================================*/
17540e23c03SJunchao Zhang /*              SF public interface implementations                                  */
17640e23c03SJunchao Zhang /*===================================================================================*/
177d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
178d71ae5a4SJacob Faibussowitsch {
179b23bfdefSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
18071438e86SJunchao Zhang   PetscInt      *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks;
18140e23c03SJunchao Zhang   PetscMPIInt    rank, niranks, *iranks, tag;
18295fce210SBarry Smith   MPI_Comm       comm;
183b5a8e515SJed Brown   MPI_Group      group;
18440e23c03SJunchao Zhang   MPI_Request   *rootreqs, *leafreqs;
18595fce210SBarry Smith 
18695fce210SBarry Smith   PetscFunctionBegin;
1879566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
1889566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpRanks(sf, group));
1899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&group));
1909566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1919566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
1929566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
19395fce210SBarry Smith   /*
19495fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
19595fce210SBarry Smith    */
1969566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sf->nranks, &rlengths));
197cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
1989371c9d4SSatish Balay   for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
19971438e86SJunchao Zhang   nRemoteRootRanks = sf->nranks - sf->ndranks;
20016cd844bSPierre Jolivet   PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths));
201c943f53fSJed Brown 
2020b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
2030b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
2040b899082SJunchao Zhang      small and the sorting is cheap.
2050b899082SJunchao Zhang    */
2069566063dSJacob Faibussowitsch   PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
2070b899082SJunchao Zhang 
208c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
209c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
210c943f53fSJed Brown   bas->niranks  = bas->ndiranks + niranks;
2119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
212c943f53fSJed Brown   bas->ioffset[0] = 0;
213c943f53fSJed Brown   for (i = 0; i < bas->ndiranks; i++) {
214c943f53fSJed Brown     bas->iranks[i]      = sf->ranks[i];
215c943f53fSJed Brown     bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
216c943f53fSJed Brown   }
217c9cc58a2SBarry Smith   PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
21840e23c03SJunchao Zhang   for (; i < bas->niranks; i++) {
219c943f53fSJed Brown     bas->iranks[i]      = iranks[i - bas->ndiranks];
220c943f53fSJed Brown     bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
221c943f53fSJed Brown   }
222c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
2239566063dSJacob Faibussowitsch   PetscCall(PetscFree(rlengths));
2249566063dSJacob Faibussowitsch   PetscCall(PetscFree(iranks));
2259566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilengths));
22695fce210SBarry Smith 
22795fce210SBarry Smith   /* Send leaf identities to roots */
22871438e86SJunchao Zhang   nRemoteLeafRanks = bas->niranks - bas->ndiranks;
2299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
2309566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
23148a46eb9SPierre Jolivet   for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
23240e23c03SJunchao Zhang   for (i = 0; i < sf->nranks; i++) {
233c87b50c4SJunchao Zhang     PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
23440e23c03SJunchao Zhang     if (i < sf->ndranks) {
23508401ef6SPierre Jolivet       PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
23608401ef6SPierre Jolivet       PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
23708401ef6SPierre Jolivet       PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
2389566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
239c943f53fSJed Brown       continue;
240c943f53fSJed Brown     }
2419566063dSJacob Faibussowitsch     PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
242bf39f1bfSJed Brown   }
2439566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
2449566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
24595fce210SBarry Smith 
24671438e86SJunchao Zhang   sf->nleafreqs  = nRemoteRootRanks;
24771438e86SJunchao Zhang   bas->nrootreqs = nRemoteLeafRanks;
248eb02082bSJunchao Zhang 
24971438e86SJunchao Zhang   /* Setup fields related to packing, such as rootbuflen[] */
2509566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpPackFields(sf));
2519566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rootreqs, leafreqs));
2523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25395fce210SBarry Smith }
25495fce210SBarry Smith 
255d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
256d71ae5a4SJacob Faibussowitsch {
257cd620004SJunchao Zhang   PetscSF_Basic *bas  = (PetscSF_Basic *)sf->data;
25871438e86SJunchao Zhang   PetscSFLink    link = bas->avail, next;
25995fce210SBarry Smith 
26095fce210SBarry Smith   PetscFunctionBegin;
26128b400f6SJacob Faibussowitsch   PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
2629566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bas->iranks, bas->ioffset));
2639566063dSJacob Faibussowitsch   PetscCall(PetscFree(bas->irootloc));
26471438e86SJunchao Zhang 
2657fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
2669566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
267eb02082bSJunchao Zhang #endif
26871438e86SJunchao Zhang 
26971438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM)
2709566063dSJacob Faibussowitsch   PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
27171438e86SJunchao Zhang #endif
27271438e86SJunchao Zhang 
2739371c9d4SSatish Balay   for (; link; link = next) {
2749371c9d4SSatish Balay     next = link->next;
2759371c9d4SSatish Balay     PetscCall(PetscSFLinkDestroy(sf, link));
2769371c9d4SSatish Balay   }
27771438e86SJunchao Zhang   bas->avail = NULL;
2789566063dSJacob Faibussowitsch   PetscCall(PetscSFResetPackFields(sf));
2793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28095fce210SBarry Smith }
28195fce210SBarry Smith 
282d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
283d71ae5a4SJacob Faibussowitsch {
28495fce210SBarry Smith   PetscFunctionBegin;
2859566063dSJacob Faibussowitsch   PetscCall(PetscSFReset_Basic(sf));
2869566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->data));
2873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28895fce210SBarry Smith }
28995fce210SBarry Smith 
29062152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
29162152dedSBarry Smith   #include <petscmat.h>
29262152dedSBarry Smith 
293d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
294d71ae5a4SJacob Faibussowitsch {
29562152dedSBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
29653dd6d7dSJunchao Zhang   PetscInt           i, nrootranks, ndrootranks;
29762152dedSBarry Smith   const PetscInt    *rootoffset;
29862152dedSBarry Smith   PetscMPIInt        rank, size;
29953dd6d7dSJunchao Zhang   const PetscMPIInt *rootranks;
30062152dedSBarry Smith   MPI_Comm           comm = PetscObjectComm((PetscObject)sf);
30153dd6d7dSJunchao Zhang   PetscScalar        unitbytes;
30262152dedSBarry Smith   Mat                A;
30362152dedSBarry Smith 
30462152dedSBarry Smith   PetscFunctionBegin;
3059566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3069566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
30753dd6d7dSJunchao Zhang   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
30853dd6d7dSJunchao Zhang     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
30953dd6d7dSJunchao Zhang     in senders' view point: how many bytes I will send to my neighbors.
31053dd6d7dSJunchao Zhang 
31153dd6d7dSJunchao Zhang     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
31253dd6d7dSJunchao Zhang 
31353dd6d7dSJunchao Zhang     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
31453dd6d7dSJunchao Zhang     different interpretations for the same SF for different data types. Since we most care about VecScatter,
31553dd6d7dSJunchao Zhang     we uniformly treat each vertex as a PetscScalar.
31653dd6d7dSJunchao Zhang   */
31753dd6d7dSJunchao Zhang   unitbytes = (PetscScalar)sizeof(PetscScalar);
31853dd6d7dSJunchao Zhang 
3199566063dSJacob Faibussowitsch   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
3209566063dSJacob Faibussowitsch   PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
3219566063dSJacob Faibussowitsch   PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
32248a46eb9SPierre Jolivet   for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
3239566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
3249566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
3259566063dSJacob Faibussowitsch   PetscCall(MatView(A, viewer));
3269566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
3273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
32862152dedSBarry Smith }
32962152dedSBarry Smith #endif
33062152dedSBarry Smith 
331d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
332d71ae5a4SJacob Faibussowitsch {
33353dd6d7dSJunchao Zhang   PetscBool isascii;
33495fce210SBarry Smith 
33595fce210SBarry Smith   PetscFunctionBegin;
3369566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3379566063dSJacob Faibussowitsch   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "  MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
33862152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
33953dd6d7dSJunchao Zhang   else {
34053dd6d7dSJunchao Zhang     PetscBool isdraw, isbinary;
3419566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
3429566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
34348a46eb9SPierre Jolivet     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
34462152dedSBarry Smith   }
34562152dedSBarry Smith #endif
3463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
34795fce210SBarry Smith }
34895fce210SBarry Smith 
349f5d27ee7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
350d71ae5a4SJacob Faibussowitsch {
351cd620004SJunchao Zhang   PetscSFLink link = NULL;
35295fce210SBarry Smith 
35395fce210SBarry Smith   PetscFunctionBegin;
35471438e86SJunchao Zhang   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
3559566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
35671438e86SJunchao Zhang   /* Pack rootdata to rootbuf for remote communication */
3579566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
358da81f932SPierre Jolivet   /* Start communication, e.g., post MPI_Isend */
3599566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
36071438e86SJunchao Zhang   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
3619566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
3623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
36395fce210SBarry Smith }
36495fce210SBarry Smith 
365d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
366d71ae5a4SJacob Faibussowitsch {
367cd620004SJunchao Zhang   PetscSFLink link = NULL;
36895fce210SBarry Smith 
36995fce210SBarry Smith   PetscFunctionBegin;
370cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
3719566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
37271438e86SJunchao Zhang   /* Finish remote communication, e.g., post MPI_Waitall */
3739566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
37471438e86SJunchao Zhang   /* Unpack data in leafbuf to leafdata for remote communication */
3759566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
37671438e86SJunchao Zhang   /* Recycle the link */
3779566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkReclaim(sf, &link));
3783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
379cd620004SJunchao Zhang }
380cd620004SJunchao Zhang 
381cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
382d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
383d71ae5a4SJacob Faibussowitsch {
38471438e86SJunchao Zhang   PetscSFLink link = NULL;
385cd620004SJunchao Zhang 
386cd620004SJunchao Zhang   PetscFunctionBegin;
3879566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
3889566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
3899566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
390cd620004SJunchao Zhang   *out = link;
3913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39295fce210SBarry Smith }
39395fce210SBarry Smith 
39495fce210SBarry Smith /* leaf -> root with reduction */
395f5d27ee7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
396d71ae5a4SJacob Faibussowitsch {
397cd620004SJunchao Zhang   PetscSFLink link = NULL;
39895fce210SBarry Smith 
39995fce210SBarry Smith   PetscFunctionBegin;
4009566063dSJacob Faibussowitsch   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
4019566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
4023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
40395fce210SBarry Smith }
40495fce210SBarry Smith 
405d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
406d71ae5a4SJacob Faibussowitsch {
407cd620004SJunchao Zhang   PetscSFLink link = NULL;
40895fce210SBarry Smith 
40995fce210SBarry Smith   PetscFunctionBegin;
4109566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
4119566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
4129566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
4139566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkReclaim(sf, &link));
4143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41595fce210SBarry Smith }
41695fce210SBarry Smith 
417d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
418d71ae5a4SJacob Faibussowitsch {
419cd620004SJunchao Zhang   PetscSFLink link = NULL;
42095fce210SBarry Smith 
42195fce210SBarry Smith   PetscFunctionBegin;
4229566063dSJacob Faibussowitsch   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
4239566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
4243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
42595fce210SBarry Smith }
42695fce210SBarry Smith 
427f5d27ee7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
428d71ae5a4SJacob Faibussowitsch {
429cd620004SJunchao Zhang   PetscSFLink link = NULL;
43095fce210SBarry Smith 
43195fce210SBarry Smith   PetscFunctionBegin;
4329566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
43395fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
4349566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
435cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
4369566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
437cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
4389566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
4399566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
440b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
4419566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
4429566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkReclaim(sf, &link));
4433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
44495fce210SBarry Smith }
44595fce210SBarry Smith 
446d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
447d71ae5a4SJacob Faibussowitsch {
4488750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
4498750ddebSJunchao Zhang 
4508750ddebSJunchao Zhang   PetscFunctionBegin;
4518750ddebSJunchao Zhang   if (niranks) *niranks = bas->niranks;
4528750ddebSJunchao Zhang   if (iranks) *iranks = bas->iranks;
4538750ddebSJunchao Zhang   if (ioffset) *ioffset = bas->ioffset;
4548750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
4553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4568750ddebSJunchao Zhang }
4578750ddebSJunchao Zhang 
458da81f932SPierre Jolivet /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
459f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
460f659e5c7SJunchao Zhang    was sorted before calling the routine.
461f659e5c7SJunchao Zhang  */
462d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
463d71ae5a4SJacob Faibussowitsch {
464f659e5c7SJunchao Zhang   PetscSF            esf;
465cd620004SJunchao Zhang   PetscInt           esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote;
466cd620004SJunchao Zhang   PetscInt           i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal;
4678e3a54c0SPierre Jolivet   char              *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
468f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
469f659e5c7SJunchao Zhang   const PetscMPIInt *ranks, *iranks;
470cd620004SJunchao Zhang   const PetscInt    *roffset, *rmine, *rremote, *ioffset, *irootloc;
471f659e5c7SJunchao Zhang   PetscBool          connected;
472f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
473f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
474f659e5c7SJunchao Zhang 
475f659e5c7SJunchao Zhang   PetscFunctionBegin;
4769566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
4779566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(esf));
4789566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
479f659e5c7SJunchao Zhang 
480cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
4819566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
4829566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
483cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
4849566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
4858e3a54c0SPierre Jolivet   leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
486f659e5c7SJunchao Zhang   /* Tag selected roots */
487f659e5c7SJunchao Zhang   for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
488f659e5c7SJunchao Zhang 
4899566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
4909566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
4919566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
492cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
493b23bfdefSJunchao Zhang   for (i = 0; i < nranks; i++) {
494cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
4959371c9d4SSatish Balay     for (j = roffset[i]; j < roffset[i + 1]; j++) {
4969371c9d4SSatish Balay       if (leafdata[rmine[j]]) {
4979371c9d4SSatish Balay         esf_nleaves++;
4989371c9d4SSatish Balay         connected = PETSC_TRUE;
4999371c9d4SSatish Balay       }
5009371c9d4SSatish Balay     }
5019371c9d4SSatish Balay     if (connected) {
5029371c9d4SSatish Balay       esf_nranks++;
5039371c9d4SSatish Balay       if (i < ndranks) esf_ndranks++;
5049371c9d4SSatish Balay     }
505f659e5c7SJunchao Zhang   }
506f659e5c7SJunchao Zhang 
507f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
5089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
5099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
5109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
511f659e5c7SJunchao Zhang   p              = 0; /* Counter for connected root ranks */
512f659e5c7SJunchao Zhang   q              = 0; /* Counter for connected leaves */
513f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
514f659e5c7SJunchao Zhang   for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
515f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
516cd620004SJunchao Zhang     for (j = roffset[i]; j < roffset[i + 1]; j++) {
517cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
518f659e5c7SJunchao Zhang         esf_rmine[q] = new_ilocal[q] = rmine[j];
519f659e5c7SJunchao Zhang         esf_rremote[q]               = rremote[j];
520f659e5c7SJunchao Zhang         new_iremote[q].index         = rremote[j];
521f659e5c7SJunchao Zhang         new_iremote[q].rank          = ranks[i];
522f659e5c7SJunchao Zhang         connected                    = PETSC_TRUE;
523f659e5c7SJunchao Zhang         q++;
524f659e5c7SJunchao Zhang       }
525f659e5c7SJunchao Zhang     }
526f659e5c7SJunchao Zhang     if (connected) {
527f659e5c7SJunchao Zhang       esf_ranks[p]       = ranks[i];
528f659e5c7SJunchao Zhang       esf_roffset[p + 1] = q;
529f659e5c7SJunchao Zhang       p++;
530f659e5c7SJunchao Zhang     }
531f659e5c7SJunchao Zhang   }
532f659e5c7SJunchao Zhang 
533f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
5349566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
535f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
536f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
537f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
538f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
539f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
540f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
541cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
542f659e5c7SJunchao Zhang 
543f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
544f659e5c7SJunchao Zhang   bas = (PetscSF_Basic *)esf->data;
5459566063dSJacob Faibussowitsch   PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
546f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
547cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
548f659e5c7SJunchao Zhang    */
5499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
5509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
551f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
552f659e5c7SJunchao Zhang   p                                              = 0; /* Counter for connected leaf ranks */
553f659e5c7SJunchao Zhang   q                                              = 0; /* Counter for connected roots */
554f659e5c7SJunchao Zhang   for (i = 0; i < niranks; i++) {
555f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
556f659e5c7SJunchao Zhang     for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
557cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
558f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
559f659e5c7SJunchao Zhang         connected          = PETSC_TRUE;
560f659e5c7SJunchao Zhang       }
561f659e5c7SJunchao Zhang     }
562f659e5c7SJunchao Zhang     if (connected) {
563f659e5c7SJunchao Zhang       bas->niranks++;
564f659e5c7SJunchao Zhang       if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
565f659e5c7SJunchao Zhang       bas->iranks[p]      = iranks[i];
566f659e5c7SJunchao Zhang       bas->ioffset[p + 1] = q;
567f659e5c7SJunchao Zhang       p++;
568f659e5c7SJunchao Zhang     }
569f659e5c7SJunchao Zhang   }
570f659e5c7SJunchao Zhang   bas->itotal     = q;
571cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
572cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
573cd620004SJunchao Zhang   /* Setup packing related fields */
5749566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpPackFields(esf));
575f659e5c7SJunchao Zhang 
57620c24465SJunchao Zhang   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
57720c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
57820c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_CUDA) {
57971438e86SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_CUDA;
58071438e86SJunchao Zhang     esf->ops->Free   = PetscSFFree_CUDA;
58120c24465SJunchao Zhang   }
58220c24465SJunchao Zhang #endif
58320c24465SJunchao Zhang 
58459af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
58559af0bd3SScott Kruger   /* TODO: Needs debugging */
58659af0bd3SScott Kruger   if (esf->backend == PETSCSF_BACKEND_HIP) {
58759af0bd3SScott Kruger     esf->ops->Malloc = PetscSFMalloc_HIP;
58859af0bd3SScott Kruger     esf->ops->Free   = PetscSFFree_HIP;
58959af0bd3SScott Kruger   }
59059af0bd3SScott Kruger #endif
59159af0bd3SScott Kruger 
59220c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
59320c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
59420c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Kokkos;
59520c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Kokkos;
59620c24465SJunchao Zhang   }
59720c24465SJunchao Zhang #endif
598f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
5999566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rootdata, leafmem));
600f659e5c7SJunchao Zhang   *newsf = esf;
6013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
602f659e5c7SJunchao Zhang }
603f659e5c7SJunchao Zhang 
604d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
605d71ae5a4SJacob Faibussowitsch {
60640e23c03SJunchao Zhang   PetscSF_Basic *dat;
60795fce210SBarry Smith 
60895fce210SBarry Smith   PetscFunctionBegin;
60995fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
61095fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
61195fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
61295fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
613ad227feaSJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
614ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
61595fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
61695fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
61795fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
61895fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
6198750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
62072502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
621f5d27ee7SJunchao Zhang   sf->ops->SetCommunicationOps  = PetscSFSetCommunicationOps_Basic;
62295fce210SBarry Smith 
6236677b1c1SJunchao Zhang   sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv
6246677b1c1SJunchao Zhang   sf->collective = PETSC_FALSE;
6256677b1c1SJunchao Zhang 
6264dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&dat));
62740e23c03SJunchao Zhang   sf->data = (void *)dat;
6283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
62995fce210SBarry Smith }
630