xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision da81f9329be15cc55f054c8a00978087195c9247)
140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4b23bfdefSJunchao Zhang 
540e23c03SJunchao Zhang /*===================================================================================*/
640e23c03SJunchao Zhang /*              SF public interface implementations                                  */
740e23c03SJunchao Zhang /*===================================================================================*/
8d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
9d71ae5a4SJacob Faibussowitsch {
10b23bfdefSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
1171438e86SJunchao Zhang   PetscInt      *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks;
1240e23c03SJunchao Zhang   PetscMPIInt    rank, niranks, *iranks, tag;
1395fce210SBarry Smith   MPI_Comm       comm;
14b5a8e515SJed Brown   MPI_Group      group;
1540e23c03SJunchao Zhang   MPI_Request   *rootreqs, *leafreqs;
1695fce210SBarry Smith 
1795fce210SBarry Smith   PetscFunctionBegin;
189566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
199566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpRanks(sf, group));
209566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&group));
219566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
229566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
239566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2495fce210SBarry Smith   /*
2595fce210SBarry Smith    * Inform roots about how many leaves and from which ranks
2695fce210SBarry Smith    */
279566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sf->nranks, &rlengths));
28cd620004SJunchao Zhang   /* Determine number, sending ranks and length of incoming */
299371c9d4SSatish Balay   for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
3071438e86SJunchao Zhang   nRemoteRootRanks = sf->nranks - sf->ndranks;
319566063dSJacob Faibussowitsch   PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths));
32c943f53fSJed Brown 
330b899082SJunchao Zhang   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
340b899082SJunchao Zhang      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
350b899082SJunchao Zhang      small and the sorting is cheap.
360b899082SJunchao Zhang    */
379566063dSJacob Faibussowitsch   PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
380b899082SJunchao Zhang 
39c943f53fSJed Brown   /* Partition into distinguished and non-distinguished incoming ranks */
40c943f53fSJed Brown   bas->ndiranks = sf->ndranks;
41c943f53fSJed Brown   bas->niranks  = bas->ndiranks + niranks;
429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
43c943f53fSJed Brown   bas->ioffset[0] = 0;
44c943f53fSJed Brown   for (i = 0; i < bas->ndiranks; i++) {
45c943f53fSJed Brown     bas->iranks[i]      = sf->ranks[i];
46c943f53fSJed Brown     bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
47c943f53fSJed Brown   }
48c9cc58a2SBarry Smith   PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
4940e23c03SJunchao Zhang   for (; i < bas->niranks; i++) {
50c943f53fSJed Brown     bas->iranks[i]      = iranks[i - bas->ndiranks];
51c943f53fSJed Brown     bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
52c943f53fSJed Brown   }
53c943f53fSJed Brown   bas->itotal = bas->ioffset[i];
549566063dSJacob Faibussowitsch   PetscCall(PetscFree(rlengths));
559566063dSJacob Faibussowitsch   PetscCall(PetscFree(iranks));
569566063dSJacob Faibussowitsch   PetscCall(PetscFree(ilengths));
5795fce210SBarry Smith 
5895fce210SBarry Smith   /* Send leaf identities to roots */
5971438e86SJunchao Zhang   nRemoteLeafRanks = bas->niranks - bas->ndiranks;
609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
6248a46eb9SPierre Jolivet   for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
6340e23c03SJunchao Zhang   for (i = 0; i < sf->nranks; i++) {
64c87b50c4SJunchao Zhang     PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
6540e23c03SJunchao Zhang     if (i < sf->ndranks) {
6608401ef6SPierre Jolivet       PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
6708401ef6SPierre Jolivet       PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
6808401ef6SPierre Jolivet       PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
699566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
70c943f53fSJed Brown       continue;
71c943f53fSJed Brown     }
729566063dSJacob Faibussowitsch     PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
73bf39f1bfSJed Brown   }
749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
759566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
7695fce210SBarry Smith 
7771438e86SJunchao Zhang   sf->nleafreqs  = nRemoteRootRanks;
7871438e86SJunchao Zhang   bas->nrootreqs = nRemoteLeafRanks;
79cd620004SJunchao Zhang   sf->persistent = PETSC_TRUE;
80eb02082bSJunchao Zhang 
8171438e86SJunchao Zhang   /* Setup fields related to packing, such as rootbuflen[] */
829566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpPackFields(sf));
839566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rootreqs, leafreqs));
8495fce210SBarry Smith   PetscFunctionReturn(0);
8595fce210SBarry Smith }
8695fce210SBarry Smith 
87d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
88d71ae5a4SJacob Faibussowitsch {
89cd620004SJunchao Zhang   PetscSF_Basic *bas  = (PetscSF_Basic *)sf->data;
9071438e86SJunchao Zhang   PetscSFLink    link = bas->avail, next;
9195fce210SBarry Smith 
9295fce210SBarry Smith   PetscFunctionBegin;
9328b400f6SJacob Faibussowitsch   PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
949566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bas->iranks, bas->ioffset));
959566063dSJacob Faibussowitsch   PetscCall(PetscFree(bas->irootloc));
9671438e86SJunchao Zhang 
977fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
989566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
99eb02082bSJunchao Zhang #endif
10071438e86SJunchao Zhang 
10171438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM)
1029566063dSJacob Faibussowitsch   PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
10371438e86SJunchao Zhang #endif
10471438e86SJunchao Zhang 
1059371c9d4SSatish Balay   for (; link; link = next) {
1069371c9d4SSatish Balay     next = link->next;
1079371c9d4SSatish Balay     PetscCall(PetscSFLinkDestroy(sf, link));
1089371c9d4SSatish Balay   }
10971438e86SJunchao Zhang   bas->avail = NULL;
1109566063dSJacob Faibussowitsch   PetscCall(PetscSFResetPackFields(sf));
11195fce210SBarry Smith   PetscFunctionReturn(0);
11295fce210SBarry Smith }
11395fce210SBarry Smith 
114d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
115d71ae5a4SJacob Faibussowitsch {
11695fce210SBarry Smith   PetscFunctionBegin;
1179566063dSJacob Faibussowitsch   PetscCall(PetscSFReset_Basic(sf));
1189566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->data));
11995fce210SBarry Smith   PetscFunctionReturn(0);
12095fce210SBarry Smith }
12195fce210SBarry Smith 
12262152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
12362152dedSBarry Smith   #include <petscmat.h>
12462152dedSBarry Smith 
125d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
126d71ae5a4SJacob Faibussowitsch {
12762152dedSBarry Smith   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
12853dd6d7dSJunchao Zhang   PetscInt           i, nrootranks, ndrootranks;
12962152dedSBarry Smith   const PetscInt    *rootoffset;
13062152dedSBarry Smith   PetscMPIInt        rank, size;
13153dd6d7dSJunchao Zhang   const PetscMPIInt *rootranks;
13262152dedSBarry Smith   MPI_Comm           comm = PetscObjectComm((PetscObject)sf);
13353dd6d7dSJunchao Zhang   PetscScalar        unitbytes;
13462152dedSBarry Smith   Mat                A;
13562152dedSBarry Smith 
13662152dedSBarry Smith   PetscFunctionBegin;
1379566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
1389566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
13953dd6d7dSJunchao Zhang   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
14053dd6d7dSJunchao Zhang     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
14153dd6d7dSJunchao Zhang     in senders' view point: how many bytes I will send to my neighbors.
14253dd6d7dSJunchao Zhang 
14353dd6d7dSJunchao Zhang     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
14453dd6d7dSJunchao Zhang 
14553dd6d7dSJunchao Zhang     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
14653dd6d7dSJunchao Zhang     different interpretations for the same SF for different data types. Since we most care about VecScatter,
14753dd6d7dSJunchao Zhang     we uniformly treat each vertex as a PetscScalar.
14853dd6d7dSJunchao Zhang   */
14953dd6d7dSJunchao Zhang   unitbytes = (PetscScalar)sizeof(PetscScalar);
15053dd6d7dSJunchao Zhang 
1519566063dSJacob Faibussowitsch   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
1529566063dSJacob Faibussowitsch   PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
1539566063dSJacob Faibussowitsch   PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
15448a46eb9SPierre Jolivet   for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
1559566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1569566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1579566063dSJacob Faibussowitsch   PetscCall(MatView(A, viewer));
1589566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&A));
15962152dedSBarry Smith   PetscFunctionReturn(0);
16062152dedSBarry Smith }
16162152dedSBarry Smith #endif
16262152dedSBarry Smith 
163d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
164d71ae5a4SJacob Faibussowitsch {
16553dd6d7dSJunchao Zhang   PetscBool isascii;
16695fce210SBarry Smith 
16795fce210SBarry Smith   PetscFunctionBegin;
1689566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1699566063dSJacob Faibussowitsch   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "  MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
17062152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY)
17153dd6d7dSJunchao Zhang   else {
17253dd6d7dSJunchao Zhang     PetscBool isdraw, isbinary;
1739566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1749566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
17548a46eb9SPierre Jolivet     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
17662152dedSBarry Smith   }
17762152dedSBarry Smith #endif
17895fce210SBarry Smith   PetscFunctionReturn(0);
17995fce210SBarry Smith }
18095fce210SBarry Smith 
181d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
182d71ae5a4SJacob Faibussowitsch {
183cd620004SJunchao Zhang   PetscSFLink link = NULL;
18495fce210SBarry Smith 
18595fce210SBarry Smith   PetscFunctionBegin;
18671438e86SJunchao Zhang   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
1879566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
18871438e86SJunchao Zhang   /* Pack rootdata to rootbuf for remote communication */
1899566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
190*da81f932SPierre Jolivet   /* Start communication, e.g., post MPI_Isend */
1919566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
19271438e86SJunchao Zhang   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
1939566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
19495fce210SBarry Smith   PetscFunctionReturn(0);
19595fce210SBarry Smith }
19695fce210SBarry Smith 
197d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
198d71ae5a4SJacob Faibussowitsch {
199cd620004SJunchao Zhang   PetscSFLink link = NULL;
20095fce210SBarry Smith 
20195fce210SBarry Smith   PetscFunctionBegin;
202cd620004SJunchao Zhang   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
2039566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
20471438e86SJunchao Zhang   /* Finish remote communication, e.g., post MPI_Waitall */
2059566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
20671438e86SJunchao Zhang   /* Unpack data in leafbuf to leafdata for remote communication */
2079566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
20871438e86SJunchao Zhang   /* Recycle the link */
2099566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkReclaim(sf, &link));
210cd620004SJunchao Zhang   PetscFunctionReturn(0);
211cd620004SJunchao Zhang }
212cd620004SJunchao Zhang 
213cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */
214d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
215d71ae5a4SJacob Faibussowitsch {
21671438e86SJunchao Zhang   PetscSFLink link = NULL;
217cd620004SJunchao Zhang 
218cd620004SJunchao Zhang   PetscFunctionBegin;
2199566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
2209566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
2219566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
222cd620004SJunchao Zhang   *out = link;
22395fce210SBarry Smith   PetscFunctionReturn(0);
22495fce210SBarry Smith }
22595fce210SBarry Smith 
22695fce210SBarry Smith /* leaf -> root with reduction */
227d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
228d71ae5a4SJacob Faibussowitsch {
229cd620004SJunchao Zhang   PetscSFLink link = NULL;
23095fce210SBarry Smith 
23195fce210SBarry Smith   PetscFunctionBegin;
2329566063dSJacob Faibussowitsch   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
2339566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
23495fce210SBarry Smith   PetscFunctionReturn(0);
23595fce210SBarry Smith }
23695fce210SBarry Smith 
237d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
238d71ae5a4SJacob Faibussowitsch {
239cd620004SJunchao Zhang   PetscSFLink link = NULL;
24095fce210SBarry Smith 
24195fce210SBarry Smith   PetscFunctionBegin;
2429566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
2439566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
2449566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
2459566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkReclaim(sf, &link));
24695fce210SBarry Smith   PetscFunctionReturn(0);
24795fce210SBarry Smith }
24895fce210SBarry Smith 
249d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
250d71ae5a4SJacob Faibussowitsch {
251cd620004SJunchao Zhang   PetscSFLink link = NULL;
25295fce210SBarry Smith 
25395fce210SBarry Smith   PetscFunctionBegin;
2549566063dSJacob Faibussowitsch   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
2559566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
25695fce210SBarry Smith   PetscFunctionReturn(0);
25795fce210SBarry Smith }
25895fce210SBarry Smith 
259d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
260d71ae5a4SJacob Faibussowitsch {
261cd620004SJunchao Zhang   PetscSFLink link = NULL;
26295fce210SBarry Smith 
26395fce210SBarry Smith   PetscFunctionBegin;
2649566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
26595fce210SBarry Smith   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
2669566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
267cd620004SJunchao Zhang   /* Do fetch-and-op, the (remote) update results are in rootbuf */
2689566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
269cd620004SJunchao Zhang   /* Bcast rootbuf to leafupdate */
2709566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
2719566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
272b23bfdefSJunchao Zhang   /* Unpack and insert fetched data into leaves */
2739566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
2749566063dSJacob Faibussowitsch   PetscCall(PetscSFLinkReclaim(sf, &link));
27595fce210SBarry Smith   PetscFunctionReturn(0);
27695fce210SBarry Smith }
27795fce210SBarry Smith 
278d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
279d71ae5a4SJacob Faibussowitsch {
2808750ddebSJunchao Zhang   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
2818750ddebSJunchao Zhang 
2828750ddebSJunchao Zhang   PetscFunctionBegin;
2838750ddebSJunchao Zhang   if (niranks) *niranks = bas->niranks;
2848750ddebSJunchao Zhang   if (iranks) *iranks = bas->iranks;
2858750ddebSJunchao Zhang   if (ioffset) *ioffset = bas->ioffset;
2868750ddebSJunchao Zhang   if (irootloc) *irootloc = bas->irootloc;
2878750ddebSJunchao Zhang   PetscFunctionReturn(0);
2888750ddebSJunchao Zhang }
2898750ddebSJunchao Zhang 
290*da81f932SPierre Jolivet /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
291f659e5c7SJunchao Zhang    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
292f659e5c7SJunchao Zhang    was sorted before calling the routine.
293f659e5c7SJunchao Zhang  */
294d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
295d71ae5a4SJacob Faibussowitsch {
296f659e5c7SJunchao Zhang   PetscSF            esf;
297cd620004SJunchao Zhang   PetscInt           esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote;
298cd620004SJunchao Zhang   PetscInt           i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal;
299cd620004SJunchao Zhang   char              *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
300f659e5c7SJunchao Zhang   PetscMPIInt       *esf_ranks;
301f659e5c7SJunchao Zhang   const PetscMPIInt *ranks, *iranks;
302cd620004SJunchao Zhang   const PetscInt    *roffset, *rmine, *rremote, *ioffset, *irootloc;
303f659e5c7SJunchao Zhang   PetscBool          connected;
304f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
305f659e5c7SJunchao Zhang   PetscSF_Basic     *bas;
306f659e5c7SJunchao Zhang 
307f659e5c7SJunchao Zhang   PetscFunctionBegin;
3089566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
3099566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(esf));
3109566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
311f659e5c7SJunchao Zhang 
312cd620004SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
3139566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
3149566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
315cd620004SJunchao Zhang   maxlocal = maxleaf - minleaf + 1;
3169566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
317cd620004SJunchao Zhang   leafdata = leafmem - minleaf;
318f659e5c7SJunchao Zhang   /* Tag selected roots */
319f659e5c7SJunchao Zhang   for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
320f659e5c7SJunchao Zhang 
3219566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
3229566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
3239566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
324cd620004SJunchao Zhang   esf_nranks = esf_ndranks = esf_nleaves = 0;
325b23bfdefSJunchao Zhang   for (i = 0; i < nranks; i++) {
326cd620004SJunchao Zhang     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
3279371c9d4SSatish Balay     for (j = roffset[i]; j < roffset[i + 1]; j++) {
3289371c9d4SSatish Balay       if (leafdata[rmine[j]]) {
3299371c9d4SSatish Balay         esf_nleaves++;
3309371c9d4SSatish Balay         connected = PETSC_TRUE;
3319371c9d4SSatish Balay       }
3329371c9d4SSatish Balay     }
3339371c9d4SSatish Balay     if (connected) {
3349371c9d4SSatish Balay       esf_nranks++;
3359371c9d4SSatish Balay       if (i < ndranks) esf_ndranks++;
3369371c9d4SSatish Balay     }
337f659e5c7SJunchao Zhang   }
338f659e5c7SJunchao Zhang 
339f659e5c7SJunchao Zhang   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
3409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
3419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
3429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
343f659e5c7SJunchao Zhang   p              = 0; /* Counter for connected root ranks */
344f659e5c7SJunchao Zhang   q              = 0; /* Counter for connected leaves */
345f659e5c7SJunchao Zhang   esf_roffset[0] = 0;
346f659e5c7SJunchao Zhang   for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
347f659e5c7SJunchao Zhang     connected = PETSC_FALSE;
348cd620004SJunchao Zhang     for (j = roffset[i]; j < roffset[i + 1]; j++) {
349cd620004SJunchao Zhang       if (leafdata[rmine[j]]) {
350f659e5c7SJunchao Zhang         esf_rmine[q] = new_ilocal[q] = rmine[j];
351f659e5c7SJunchao Zhang         esf_rremote[q]               = rremote[j];
352f659e5c7SJunchao Zhang         new_iremote[q].index         = rremote[j];
353f659e5c7SJunchao Zhang         new_iremote[q].rank          = ranks[i];
354f659e5c7SJunchao Zhang         connected                    = PETSC_TRUE;
355f659e5c7SJunchao Zhang         q++;
356f659e5c7SJunchao Zhang       }
357f659e5c7SJunchao Zhang     }
358f659e5c7SJunchao Zhang     if (connected) {
359f659e5c7SJunchao Zhang       esf_ranks[p]       = ranks[i];
360f659e5c7SJunchao Zhang       esf_roffset[p + 1] = q;
361f659e5c7SJunchao Zhang       p++;
362f659e5c7SJunchao Zhang     }
363f659e5c7SJunchao Zhang   }
364f659e5c7SJunchao Zhang 
365f659e5c7SJunchao Zhang   /* SetGraph internally resets the SF, so we only set its fields after the call */
3669566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
367f659e5c7SJunchao Zhang   esf->nranks    = esf_nranks;
368f659e5c7SJunchao Zhang   esf->ndranks   = esf_ndranks;
369f659e5c7SJunchao Zhang   esf->ranks     = esf_ranks;
370f659e5c7SJunchao Zhang   esf->roffset   = esf_roffset;
371f659e5c7SJunchao Zhang   esf->rmine     = esf_rmine;
372f659e5c7SJunchao Zhang   esf->rremote   = esf_rremote;
373cd620004SJunchao Zhang   esf->nleafreqs = esf_nranks - esf_ndranks;
374f659e5c7SJunchao Zhang 
375f659e5c7SJunchao Zhang   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
376f659e5c7SJunchao Zhang   bas = (PetscSF_Basic *)esf->data;
3779566063dSJacob Faibussowitsch   PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
378f659e5c7SJunchao Zhang   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
379cd620004SJunchao Zhang      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
380f659e5c7SJunchao Zhang    */
3819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
3829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
383f659e5c7SJunchao Zhang   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
384f659e5c7SJunchao Zhang   p                                              = 0; /* Counter for connected leaf ranks */
385f659e5c7SJunchao Zhang   q                                              = 0; /* Counter for connected roots */
386f659e5c7SJunchao Zhang   for (i = 0; i < niranks; i++) {
387f659e5c7SJunchao Zhang     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
388f659e5c7SJunchao Zhang     for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
389cd620004SJunchao Zhang       if (rootdata[irootloc[j]]) {
390f659e5c7SJunchao Zhang         bas->irootloc[q++] = irootloc[j];
391f659e5c7SJunchao Zhang         connected          = PETSC_TRUE;
392f659e5c7SJunchao Zhang       }
393f659e5c7SJunchao Zhang     }
394f659e5c7SJunchao Zhang     if (connected) {
395f659e5c7SJunchao Zhang       bas->niranks++;
396f659e5c7SJunchao Zhang       if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
397f659e5c7SJunchao Zhang       bas->iranks[p]      = iranks[i];
398f659e5c7SJunchao Zhang       bas->ioffset[p + 1] = q;
399f659e5c7SJunchao Zhang       p++;
400f659e5c7SJunchao Zhang     }
401f659e5c7SJunchao Zhang   }
402f659e5c7SJunchao Zhang   bas->itotal     = q;
403cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
404cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
405cd620004SJunchao Zhang   /* Setup packing related fields */
4069566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpPackFields(esf));
407f659e5c7SJunchao Zhang 
40820c24465SJunchao Zhang   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
40920c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
41020c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_CUDA) {
41171438e86SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_CUDA;
41271438e86SJunchao Zhang     esf->ops->Free   = PetscSFFree_CUDA;
41320c24465SJunchao Zhang   }
41420c24465SJunchao Zhang #endif
41520c24465SJunchao Zhang 
41659af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
41759af0bd3SScott Kruger   /* TODO: Needs debugging */
41859af0bd3SScott Kruger   if (esf->backend == PETSCSF_BACKEND_HIP) {
41959af0bd3SScott Kruger     esf->ops->Malloc = PetscSFMalloc_HIP;
42059af0bd3SScott Kruger     esf->ops->Free   = PetscSFFree_HIP;
42159af0bd3SScott Kruger   }
42259af0bd3SScott Kruger #endif
42359af0bd3SScott Kruger 
42420c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
42520c24465SJunchao Zhang   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
42620c24465SJunchao Zhang     esf->ops->Malloc = PetscSFMalloc_Kokkos;
42720c24465SJunchao Zhang     esf->ops->Free   = PetscSFFree_Kokkos;
42820c24465SJunchao Zhang   }
42920c24465SJunchao Zhang #endif
430f659e5c7SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
4319566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rootdata, leafmem));
432f659e5c7SJunchao Zhang   *newsf = esf;
433f659e5c7SJunchao Zhang   PetscFunctionReturn(0);
434f659e5c7SJunchao Zhang }
435f659e5c7SJunchao Zhang 
436d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
437d71ae5a4SJacob Faibussowitsch {
43840e23c03SJunchao Zhang   PetscSF_Basic *dat;
43995fce210SBarry Smith 
44095fce210SBarry Smith   PetscFunctionBegin;
44195fce210SBarry Smith   sf->ops->SetUp                = PetscSFSetUp_Basic;
44295fce210SBarry Smith   sf->ops->Reset                = PetscSFReset_Basic;
44395fce210SBarry Smith   sf->ops->Destroy              = PetscSFDestroy_Basic;
44495fce210SBarry Smith   sf->ops->View                 = PetscSFView_Basic;
445ad227feaSJunchao Zhang   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
446ad227feaSJunchao Zhang   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
44795fce210SBarry Smith   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
44895fce210SBarry Smith   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
44995fce210SBarry Smith   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
45095fce210SBarry Smith   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
4518750ddebSJunchao Zhang   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
45272502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
45395fce210SBarry Smith 
4544dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&dat));
45540e23c03SJunchao Zhang   sf->data = (void *)dat;
45695fce210SBarry Smith   PetscFunctionReturn(0);
45795fce210SBarry Smith }
458