xref: /petsc/src/vec/is/sf/interface/sf.c (revision 2f04c52277ae86d0bd99bd90d9d5574dfa2d51e6)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
7715b587bSJunchao Zhang   #include <petscdevice_cuda.h>
87fd2d3dbSJunchao Zhang #endif
97fd2d3dbSJunchao Zhang 
107fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
117fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
127fd2d3dbSJunchao Zhang #endif
137fd2d3dbSJunchao Zhang 
142abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
154bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
162abc8c78SJacob Faibussowitsch #else
1795fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
18a8f51744SPierre Jolivet     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
1995fce210SBarry Smith   #else
209371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
219371c9d4SSatish Balay       do { \
229371c9d4SSatish Balay       } while (0)
2395fce210SBarry Smith   #endif
242abc8c78SJacob Faibussowitsch #endif
2595fce210SBarry Smith 
264c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
271f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2895fce210SBarry Smith 
298af6ec1cSBarry Smith /*@
3095fce210SBarry Smith   PetscSFCreate - create a star forest communication context
3195fce210SBarry Smith 
32d083f849SBarry Smith   Collective
3395fce210SBarry Smith 
344165533cSJose E. Roman   Input Parameter:
3595fce210SBarry Smith . comm - communicator on which the star forest will operate
3695fce210SBarry Smith 
374165533cSJose E. Roman   Output Parameter:
3895fce210SBarry Smith . sf - new star forest context
3995fce210SBarry Smith 
4020662ed9SBarry Smith   Options Database Key:
416677b1c1SJunchao Zhang + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
426677b1c1SJunchao Zhang . -sf_type window                - Use MPI-3 one-sided window for communication
436677b1c1SJunchao Zhang . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
446677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45dd5b3ca6SJunchao Zhang 
4695fce210SBarry Smith   Level: intermediate
4795fce210SBarry Smith 
48cab54364SBarry Smith   Note:
49cab54364SBarry Smith   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50cab54364SBarry Smith   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
51*2f04c522SBarry Smith   `PetscSF`s are optimized and have better performance than the general `PetscSF`s.
52dd5b3ca6SJunchao Zhang 
53*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5495fce210SBarry Smith @*/
55d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56d71ae5a4SJacob Faibussowitsch {
5795fce210SBarry Smith   PetscSF b;
5895fce210SBarry Smith 
5995fce210SBarry Smith   PetscFunctionBegin;
604f572ea9SToby Isaac   PetscAssertPointer(sf, 2);
619566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6295fce210SBarry Smith 
639566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6495fce210SBarry Smith   b->nroots    = -1;
6595fce210SBarry Smith   b->nleaves   = -1;
661690c2aeSBarry Smith   b->minleaf   = PETSC_INT_MAX;
671690c2aeSBarry Smith   b->maxleaf   = PETSC_INT_MIN;
6895fce210SBarry Smith   b->nranks    = -1;
6995fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7095fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7195fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7295fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7320c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7420c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7520c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7671438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7727f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
7820c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
7927f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8027f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8159af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8259af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8320c24465SJunchao Zhang   #endif
8471438e86SJunchao Zhang 
8571438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8671438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8771438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
889566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
899566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9071438e86SJunchao Zhang   #endif
9120c24465SJunchao Zhang #endif
9260c22052SBarry Smith   b->vscat.from_n = -1;
9360c22052SBarry Smith   b->vscat.to_n   = -1;
9460c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9595fce210SBarry Smith   *sf             = b;
963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9795fce210SBarry Smith }
9895fce210SBarry Smith 
9929046d53SLisandro Dalcin /*@
10095fce210SBarry Smith   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10195fce210SBarry Smith 
10295fce210SBarry Smith   Collective
10395fce210SBarry Smith 
1044165533cSJose E. Roman   Input Parameter:
10595fce210SBarry Smith . sf - star forest
10695fce210SBarry Smith 
10795fce210SBarry Smith   Level: advanced
10895fce210SBarry Smith 
109*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11095fce210SBarry Smith @*/
111d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
112d71ae5a4SJacob Faibussowitsch {
11395fce210SBarry Smith   PetscFunctionBegin;
11495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
1160dd791a8SStefano Zampini   PetscCall(PetscSFDestroy(&sf->rankssf));
1170dd791a8SStefano Zampini 
11829046d53SLisandro Dalcin   sf->nroots   = -1;
11929046d53SLisandro Dalcin   sf->nleaves  = -1;
1201690c2aeSBarry Smith   sf->minleaf  = PETSC_INT_MAX;
1211690c2aeSBarry Smith   sf->maxleaf  = PETSC_INT_MIN;
12295fce210SBarry Smith   sf->mine     = NULL;
12395fce210SBarry Smith   sf->remote   = NULL;
12429046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1259566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1269566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12721c688dcSJed Brown   sf->nranks = -1;
1289566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
12929046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1309566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1319566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1329566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1330dd791a8SStefano Zampini 
134013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1359566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1360dd791a8SStefano Zampini 
1379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
13871438e86SJunchao Zhang 
13971438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1409566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14171438e86SJunchao Zhang #endif
14271438e86SJunchao Zhang 
14395fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
1443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14595fce210SBarry Smith }
14695fce210SBarry Smith 
147cc4c1da9SBarry Smith /*@
148cab54364SBarry Smith   PetscSFSetType - Set the `PetscSF` communication implementation
14995fce210SBarry Smith 
150c3339decSBarry Smith   Collective
15195fce210SBarry Smith 
15295fce210SBarry Smith   Input Parameters:
153cab54364SBarry Smith + sf   - the `PetscSF` context
15495fce210SBarry Smith - type - a known method
155cab54364SBarry Smith .vb
156cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
157cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158cab54364SBarry Smith .ve
15995fce210SBarry Smith 
16095fce210SBarry Smith   Options Database Key:
16120662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162cab54364SBarry Smith 
163cab54364SBarry Smith   Level: intermediate
16495fce210SBarry Smith 
16595fce210SBarry Smith   Notes:
16620662ed9SBarry Smith   See `PetscSFType` for possible values
16795fce210SBarry Smith 
168*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`
16995fce210SBarry Smith @*/
170d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171d71ae5a4SJacob Faibussowitsch {
17295fce210SBarry Smith   PetscBool match;
1735f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17495fce210SBarry Smith 
17595fce210SBarry Smith   PetscFunctionBegin;
17695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1774f572ea9SToby Isaac   PetscAssertPointer(type, 2);
17895fce210SBarry Smith 
1799566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1803ba16761SJacob Faibussowitsch   if (match) PetscFunctionReturn(PETSC_SUCCESS);
18195fce210SBarry Smith 
1829566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1836adde796SStefano Zampini   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18429046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
185dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1869566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1879566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1889566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
1893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19095fce210SBarry Smith }
19195fce210SBarry Smith 
192cc4c1da9SBarry Smith /*@
193cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19429046d53SLisandro Dalcin 
19529046d53SLisandro Dalcin   Not Collective
19629046d53SLisandro Dalcin 
19729046d53SLisandro Dalcin   Input Parameter:
198cab54364SBarry Smith . sf - the `PetscSF` context
19929046d53SLisandro Dalcin 
20029046d53SLisandro Dalcin   Output Parameter:
201cab54364SBarry Smith . type - the `PetscSF` type name
20229046d53SLisandro Dalcin 
20329046d53SLisandro Dalcin   Level: intermediate
20429046d53SLisandro Dalcin 
205*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20629046d53SLisandro Dalcin @*/
207d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208d71ae5a4SJacob Faibussowitsch {
20929046d53SLisandro Dalcin   PetscFunctionBegin;
21029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2114f572ea9SToby Isaac   PetscAssertPointer(type, 2);
21229046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
2133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21429046d53SLisandro Dalcin }
21529046d53SLisandro Dalcin 
2160764c050SBarry Smith /*@
21720662ed9SBarry Smith   PetscSFDestroy - destroy a star forest
21895fce210SBarry Smith 
21995fce210SBarry Smith   Collective
22095fce210SBarry Smith 
2214165533cSJose E. Roman   Input Parameter:
22295fce210SBarry Smith . sf - address of star forest
22395fce210SBarry Smith 
22495fce210SBarry Smith   Level: intermediate
22595fce210SBarry Smith 
226*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22795fce210SBarry Smith @*/
228d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
229d71ae5a4SJacob Faibussowitsch {
23095fce210SBarry Smith   PetscFunctionBegin;
2313ba16761SJacob Faibussowitsch   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232f4f49eeaSPierre Jolivet   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233f4f49eeaSPierre Jolivet   if (--((PetscObject)*sf)->refct > 0) {
2349371c9d4SSatish Balay     *sf = NULL;
2353ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2369371c9d4SSatish Balay   }
2379566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
238f4f49eeaSPierre Jolivet   PetscTryTypeMethod(*sf, Destroy);
2399566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2409566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242715b587bSJunchao Zhang   if ((*sf)->use_stream_aware_mpi) {
243715b587bSJunchao Zhang     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244715b587bSJunchao Zhang     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245715b587bSJunchao Zhang   }
246715b587bSJunchao Zhang #endif
2479566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
2483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24995fce210SBarry Smith }
25095fce210SBarry Smith 
251d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252d71ae5a4SJacob Faibussowitsch {
253c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
254c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
255c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
256c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
257c4e6a40aSLawrence Mitchell 
258c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
2593ba16761SJacob Faibussowitsch   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2609566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
263c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
264c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
265c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26708401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26808401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269c4e6a40aSLawrence Mitchell   }
2703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
271c4e6a40aSLawrence Mitchell }
272c4e6a40aSLawrence Mitchell 
27395fce210SBarry Smith /*@
27420662ed9SBarry Smith   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27595fce210SBarry Smith 
27695fce210SBarry Smith   Collective
27795fce210SBarry Smith 
2784165533cSJose E. Roman   Input Parameter:
27995fce210SBarry Smith . sf - star forest communication object
28095fce210SBarry Smith 
28195fce210SBarry Smith   Level: beginner
28295fce210SBarry Smith 
283*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28495fce210SBarry Smith @*/
285d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
286d71ae5a4SJacob Faibussowitsch {
28795fce210SBarry Smith   PetscFunctionBegin;
28829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
28929046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
2903ba16761SJacob Faibussowitsch   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2919566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2929566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2939566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
29520c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29620c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
29771438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
29871438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
29920c24465SJunchao Zhang   }
30020c24465SJunchao Zhang #endif
30159af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30259af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
30359af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
30459af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
30559af0bd3SScott Kruger   }
30659af0bd3SScott Kruger #endif
30720c24465SJunchao Zhang 
30820c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
30920c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31020c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31120c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31220c24465SJunchao Zhang   }
31320c24465SJunchao Zhang #endif
3149566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31595fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
3163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31795fce210SBarry Smith }
31895fce210SBarry Smith 
3198af6ec1cSBarry Smith /*@
320cab54364SBarry Smith   PetscSFSetFromOptions - set `PetscSF` options using the options database
32195fce210SBarry Smith 
32295fce210SBarry Smith   Logically Collective
32395fce210SBarry Smith 
3244165533cSJose E. Roman   Input Parameter:
32595fce210SBarry Smith . sf - star forest
32695fce210SBarry Smith 
32795fce210SBarry Smith   Options Database Keys:
32820662ed9SBarry Smith + -sf_type                      - implementation type, see `PetscSFSetType()`
329*2f04c522SBarry Smith . -sf_rank_order                - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise
33020662ed9SBarry Smith . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33120662ed9SBarry Smith                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33220662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33320662ed9SBarry Smith . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33420662ed9SBarry Smith                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
33595fce210SBarry Smith 
3366497c311SBarry Smith - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33759af0bd3SScott Kruger                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33820c24465SJunchao Zhang                                   the only available is kokkos.
33920c24465SJunchao Zhang 
34095fce210SBarry Smith   Level: intermediate
341cab54364SBarry Smith 
342*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34395fce210SBarry Smith @*/
344d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345d71ae5a4SJacob Faibussowitsch {
34695fce210SBarry Smith   PetscSFType deft;
34795fce210SBarry Smith   char        type[256];
34895fce210SBarry Smith   PetscBool   flg;
34995fce210SBarry Smith 
35095fce210SBarry Smith   PetscFunctionBegin;
35195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
35395fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3549566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3559566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3569566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357f9334340SJunchao Zhang   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
3587fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
35920c24465SJunchao Zhang   {
36020c24465SJunchao Zhang     char      backendstr[32] = {0};
36159af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36220c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
363d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3649566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3669566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3679566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3689566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
36959af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37020c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37120c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37259af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37328b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37420c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37508401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37620c24465SJunchao Zhang   #endif
377715b587bSJunchao Zhang 
378715b587bSJunchao Zhang   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379715b587bSJunchao Zhang     if (sf->use_stream_aware_mpi) {
380715b587bSJunchao Zhang       MPI_Info info;
381715b587bSJunchao Zhang 
382715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_create(&info));
383715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384715b587bSJunchao Zhang       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
387715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388715b587bSJunchao Zhang     }
389715b587bSJunchao Zhang   #endif
39020c24465SJunchao Zhang   }
391c2a741eeSJunchao Zhang #endif
392dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393d0609cedSBarry Smith   PetscOptionsEnd();
3943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39595fce210SBarry Smith }
39695fce210SBarry Smith 
39729046d53SLisandro Dalcin /*@
398*2f04c522SBarry Smith   PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order
39995fce210SBarry Smith 
40095fce210SBarry Smith   Logically Collective
40195fce210SBarry Smith 
4024165533cSJose E. Roman   Input Parameters:
40395fce210SBarry Smith + sf  - star forest
404*2f04c522SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic)
40595fce210SBarry Smith 
40695fce210SBarry Smith   Level: advanced
40795fce210SBarry Smith 
408*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
40995fce210SBarry Smith @*/
410d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411d71ae5a4SJacob Faibussowitsch {
41295fce210SBarry Smith   PetscFunctionBegin;
41395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41495fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
41528b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41695fce210SBarry Smith   sf->rankorder = flg;
4173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41895fce210SBarry Smith }
41995fce210SBarry Smith 
4205d83a8b1SBarry Smith /*@
42195fce210SBarry Smith   PetscSFSetGraph - Set a parallel star forest
42295fce210SBarry Smith 
42395fce210SBarry Smith   Collective
42495fce210SBarry Smith 
4254165533cSJose E. Roman   Input Parameters:
42695fce210SBarry Smith + sf         - star forest
427*2f04c522SBarry Smith . nroots     - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves)
428*2f04c522SBarry Smith . nleaves    - number of leaf vertices on the current MPI process, each of these references a root on any process
429*2f04c522SBarry Smith . ilocal     - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1))
43020662ed9SBarry Smith . localmode  - copy mode for `ilocal`
431*2f04c522SBarry Smith . iremote    - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode)
43220662ed9SBarry Smith - remotemode - copy mode for `iremote`
43395fce210SBarry Smith 
43495fce210SBarry Smith   Level: intermediate
43595fce210SBarry Smith 
43695452b02SPatrick Sanan   Notes:
43720662ed9SBarry Smith   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
43838ab3f8aSBarry Smith 
43920662ed9SBarry Smith   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44020662ed9SBarry Smith   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
441db2b9530SVaclav Hapla   PETSc might modify the respective array;
44220662ed9SBarry Smith   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
443*2f04c522SBarry Smith   If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
444*2f04c522SBarry Smith 
445*2f04c522SBarry Smith   `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations.
446db2b9530SVaclav Hapla 
44738b5cf2dSJacob Faibussowitsch   Fortran Notes:
44820662ed9SBarry Smith   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449c4e6a40aSLawrence Mitchell 
45038b5cf2dSJacob Faibussowitsch   Developer Notes:
451db2b9530SVaclav Hapla   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45220662ed9SBarry Smith   This also allows to compare leaf sets of two `PetscSF`s easily.
45372bf8598SVaclav Hapla 
454*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()`
45595fce210SBarry Smith @*/
4569c9354e5SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457d71ae5a4SJacob Faibussowitsch {
458db2b9530SVaclav Hapla   PetscBool unique, contiguous;
45995fce210SBarry Smith 
46095fce210SBarry Smith   PetscFunctionBegin;
46195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4624f572ea9SToby Isaac   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4634f572ea9SToby Isaac   if (nleaves > 0) PetscAssertPointer(iremote, 6);
46408401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46508401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4668da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4678da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4688da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4698da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47029046d53SLisandro Dalcin 
4712a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4729566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4732a67d2daSStefano Zampini   }
4742a67d2daSStefano Zampini 
4759566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
4766497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
4776497c311SBarry Smith     PetscMPIInt size;
4786497c311SBarry Smith 
4796497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480ac530a7eSPierre Jolivet     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
4816497c311SBarry Smith   }
48229046d53SLisandro Dalcin 
48395fce210SBarry Smith   sf->nroots  = nroots;
48495fce210SBarry Smith   sf->nleaves = nleaves;
48529046d53SLisandro Dalcin 
486db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
487db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
488db2b9530SVaclav Hapla 
4899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4909566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491db2b9530SVaclav Hapla     ilocal = tlocal;
492db2b9530SVaclav Hapla   }
493db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
494db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
495db2b9530SVaclav Hapla 
4969566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4979566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498db2b9530SVaclav Hapla     iremote = tremote;
499db2b9530SVaclav Hapla   }
500db2b9530SVaclav Hapla 
50129046d53SLisandro Dalcin   if (nleaves && ilocal) {
502db2b9530SVaclav Hapla     PetscSFNode work;
503db2b9530SVaclav Hapla 
5049566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
5059566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506db2b9530SVaclav Hapla     unique = PetscNot(unique);
507db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
509db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
510db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
51129046d53SLisandro Dalcin   } else {
51229046d53SLisandro Dalcin     sf->minleaf = 0;
51329046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
514db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
515db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
51629046d53SLisandro Dalcin   }
51729046d53SLisandro Dalcin 
518db2b9530SVaclav Hapla   if (contiguous) {
519db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
520db2b9530SVaclav Hapla       ilocal = NULL;
521db2b9530SVaclav Hapla     } else {
5229566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
523db2b9530SVaclav Hapla     }
524db2b9530SVaclav Hapla   }
525db2b9530SVaclav Hapla   sf->mine = ilocal;
526db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
52729046d53SLisandro Dalcin     sf->mine_alloc = NULL;
528db2b9530SVaclav Hapla   } else {
529db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
53095fce210SBarry Smith   }
5316497c311SBarry Smith   if (PetscDefined(USE_DEBUG)) {
5326497c311SBarry Smith     PetscMPIInt size;
5336497c311SBarry Smith 
5346497c311SBarry Smith     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535ac530a7eSPierre Jolivet     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
5366497c311SBarry Smith   }
537db2b9530SVaclav Hapla   sf->remote = iremote;
538db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
53929046d53SLisandro Dalcin     sf->remote_alloc = NULL;
540db2b9530SVaclav Hapla   } else {
541db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
54295fce210SBarry Smith   }
5439566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
54429046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
5453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54695fce210SBarry Smith }
54795fce210SBarry Smith 
54829046d53SLisandro Dalcin /*@
549cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550dd5b3ca6SJunchao Zhang 
551dd5b3ca6SJunchao Zhang   Collective
552dd5b3ca6SJunchao Zhang 
553dd5b3ca6SJunchao Zhang   Input Parameters:
554cab54364SBarry Smith + sf      - The `PetscSF`
555*2f04c522SBarry Smith . map     - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557cab54364SBarry Smith 
558cab54364SBarry Smith   Level: intermediate
559dd5b3ca6SJunchao Zhang 
560dd5b3ca6SJunchao Zhang   Notes:
561*2f04c522SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`.
562*2f04c522SBarry Smith   `n` and `N` are the local and global sizes of `root` respectively.
563dd5b3ca6SJunchao Zhang 
564*2f04c522SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to
565*2f04c522SBarry Smith   sequential vectors `leaves` on all MPI processes.
566dd5b3ca6SJunchao Zhang 
567*2f04c522SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a
568*2f04c522SBarry Smith   sequential vector `leaves` on MPI rank 0.
569dd5b3ca6SJunchao Zhang 
570*2f04c522SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine
571*2f04c522SBarry Smith   creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i
572cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
573*2f04c522SBarry Smith   not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data
574*2f04c522SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric.
575dd5b3ca6SJunchao Zhang 
576*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
577dd5b3ca6SJunchao Zhang  @*/
578d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
579d71ae5a4SJacob Faibussowitsch {
580dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
581dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
582dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
583dd5b3ca6SJunchao Zhang   PetscSFType type;
584dd5b3ca6SJunchao Zhang 
585dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5862abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5874f572ea9SToby Isaac   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5889566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5892c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5909566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5919566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
592dd5b3ca6SJunchao Zhang 
593dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
594835f2295SStefano Zampini     PetscInt sizei = size;
595835f2295SStefano Zampini 
596dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
5979566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
5989566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
599835f2295SStefano Zampini     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
6009566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
601dd5b3ca6SJunchao Zhang   } else {
6029566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
6039566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
604dd5b3ca6SJunchao Zhang     res[0] = n;
605dd5b3ca6SJunchao Zhang     res[1] = -n;
606dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
607462c564dSBarry Smith     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
608dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
609dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
610dd5b3ca6SJunchao Zhang     } else {
611dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
612dd5b3ca6SJunchao Zhang     }
6139566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
614dd5b3ca6SJunchao Zhang   }
6159566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
616dd5b3ca6SJunchao Zhang 
617dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
618dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
619dd5b3ca6SJunchao Zhang 
620dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
621dd5b3ca6SJunchao Zhang      Also set other easy stuff.
622dd5b3ca6SJunchao Zhang    */
623dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
624dd5b3ca6SJunchao Zhang     sf->nleaves = N;
625dd5b3ca6SJunchao Zhang     sf->nroots  = n;
626dd5b3ca6SJunchao Zhang     sf->nranks  = size;
627dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
628dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
629dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
630dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
631dd5b3ca6SJunchao Zhang     sf->nroots  = n;
632dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
633dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
634dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
635dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
636dd5b3ca6SJunchao Zhang     sf->nleaves = size;
637dd5b3ca6SJunchao Zhang     sf->nroots  = size;
638dd5b3ca6SJunchao Zhang     sf->nranks  = size;
639dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
640dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
641dd5b3ca6SJunchao Zhang   }
642dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
643dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
6443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
645dd5b3ca6SJunchao Zhang }
646dd5b3ca6SJunchao Zhang 
647dd5b3ca6SJunchao Zhang /*@
648*2f04c522SBarry Smith   PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map
64995fce210SBarry Smith 
65095fce210SBarry Smith   Collective
65195fce210SBarry Smith 
6524165533cSJose E. Roman   Input Parameter:
65395fce210SBarry Smith . sf - star forest to invert
65495fce210SBarry Smith 
6554165533cSJose E. Roman   Output Parameter:
65620662ed9SBarry Smith . isf - inverse of `sf`
6574165533cSJose E. Roman 
65895fce210SBarry Smith   Level: advanced
65995fce210SBarry Smith 
66095fce210SBarry Smith   Notes:
66195fce210SBarry Smith   All roots must have degree 1.
66295fce210SBarry Smith 
66395fce210SBarry Smith   The local space may be a permutation, but cannot be sparse.
66495fce210SBarry Smith 
665*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
66695fce210SBarry Smith @*/
667d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
668d71ae5a4SJacob Faibussowitsch {
66995fce210SBarry Smith   PetscMPIInt     rank;
67095fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
67195fce210SBarry Smith   const PetscInt *ilocal;
67295fce210SBarry Smith   PetscSFNode    *roots, *leaves;
67395fce210SBarry Smith 
67495fce210SBarry Smith   PetscFunctionBegin;
67529046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
67629046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
6774f572ea9SToby Isaac   PetscAssertPointer(isf, 2);
67829046d53SLisandro Dalcin 
6799566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
68029046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
68129046d53SLisandro Dalcin 
6829566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
684ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
68595fce210SBarry Smith     leaves[i].rank  = rank;
68695fce210SBarry Smith     leaves[i].index = i;
68795fce210SBarry Smith   }
68895fce210SBarry Smith   for (i = 0; i < nroots; i++) {
68995fce210SBarry Smith     roots[i].rank  = -1;
69095fce210SBarry Smith     roots[i].index = -1;
69195fce210SBarry Smith   }
6926497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
6936497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
69495fce210SBarry Smith 
69595fce210SBarry Smith   /* Check whether our leaves are sparse */
6969371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
6979371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
69895fce210SBarry Smith   if (count == nroots) newilocal = NULL;
6999371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
70095fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
70195fce210SBarry Smith       if (roots[i].rank >= 0) {
70295fce210SBarry Smith         newilocal[count]   = i;
70395fce210SBarry Smith         roots[count].rank  = roots[i].rank;
70495fce210SBarry Smith         roots[count].index = roots[i].index;
70595fce210SBarry Smith         count++;
70695fce210SBarry Smith       }
70795fce210SBarry Smith     }
70895fce210SBarry Smith   }
70995fce210SBarry Smith 
7109566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7119566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7129566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
7133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
71495fce210SBarry Smith }
71595fce210SBarry Smith 
71695fce210SBarry Smith /*@
717cab54364SBarry Smith   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
71895fce210SBarry Smith 
71995fce210SBarry Smith   Collective
72095fce210SBarry Smith 
7214165533cSJose E. Roman   Input Parameters:
72295fce210SBarry Smith + sf  - communication object to duplicate
723cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
72495fce210SBarry Smith 
7254165533cSJose E. Roman   Output Parameter:
72695fce210SBarry Smith . newsf - new communication object
72795fce210SBarry Smith 
72895fce210SBarry Smith   Level: beginner
72995fce210SBarry Smith 
730*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
73195fce210SBarry Smith @*/
732d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
733d71ae5a4SJacob Faibussowitsch {
73429046d53SLisandro Dalcin   PetscSFType  type;
73597929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
73695fce210SBarry Smith 
73795fce210SBarry Smith   PetscFunctionBegin;
73829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
73929046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
7404f572ea9SToby Isaac   PetscAssertPointer(newsf, 3);
7419566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7429566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7439566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
74435cb6cd3SPierre Jolivet   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
74595fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
746dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
747dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
74895fce210SBarry Smith       PetscInt           nroots, nleaves;
74995fce210SBarry Smith       const PetscInt    *ilocal;
75095fce210SBarry Smith       const PetscSFNode *iremote;
7519566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7529566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
753dd5b3ca6SJunchao Zhang     } else {
7549566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
755dd5b3ca6SJunchao Zhang     }
75695fce210SBarry Smith   }
75797929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7589566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
75997929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
76097929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
76197929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
76297929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
76397929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
76497929ea7SJunchao Zhang 
76520c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
76620c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
76771438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
76820c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
76920c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
77020c24465SJunchao Zhang #endif
771dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
77220c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
77495fce210SBarry Smith }
77595fce210SBarry Smith 
77695fce210SBarry Smith /*@C
77795fce210SBarry Smith   PetscSFGetGraph - Get the graph specifying a parallel star forest
77895fce210SBarry Smith 
77995fce210SBarry Smith   Not Collective
78095fce210SBarry Smith 
7814165533cSJose E. Roman   Input Parameter:
78295fce210SBarry Smith . sf - star forest
78395fce210SBarry Smith 
7844165533cSJose E. Roman   Output Parameters:
785*2f04c522SBarry Smith + nroots  - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves)
786*2f04c522SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
78720662ed9SBarry Smith . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
78895fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
78995fce210SBarry Smith 
790cab54364SBarry Smith   Level: intermediate
791cab54364SBarry Smith 
792373e0d91SLisandro Dalcin   Notes:
79320662ed9SBarry Smith   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
794373e0d91SLisandro Dalcin 
79520662ed9SBarry Smith   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
796db2b9530SVaclav Hapla 
797ce78bad3SBarry Smith   Fortran Note:
798ce78bad3SBarry Smith   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
799ca797d7aSLawrence Mitchell 
800*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
80195fce210SBarry Smith @*/
802ce78bad3SBarry Smith PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
803d71ae5a4SJacob Faibussowitsch {
80495fce210SBarry Smith   PetscFunctionBegin;
80595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
806b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
807f4f49eeaSPierre Jolivet     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
808b8dee149SJunchao Zhang   } else {
80995fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
81095fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
81195fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
81295fce210SBarry Smith     if (iremote) *iremote = sf->remote;
813b8dee149SJunchao Zhang   }
8143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81595fce210SBarry Smith }
81695fce210SBarry Smith 
81729046d53SLisandro Dalcin /*@
81895fce210SBarry Smith   PetscSFGetLeafRange - Get the active leaf ranges
81995fce210SBarry Smith 
82095fce210SBarry Smith   Not Collective
82195fce210SBarry Smith 
8224165533cSJose E. Roman   Input Parameter:
82395fce210SBarry Smith . sf - star forest
82495fce210SBarry Smith 
8254165533cSJose E. Roman   Output Parameters:
826*2f04c522SBarry Smith + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves.
827*2f04c522SBarry Smith - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves.
82895fce210SBarry Smith 
82995fce210SBarry Smith   Level: developer
83095fce210SBarry Smith 
831*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
83295fce210SBarry Smith @*/
833d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
834d71ae5a4SJacob Faibussowitsch {
83595fce210SBarry Smith   PetscFunctionBegin;
83695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
83729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
83895fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
83995fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
8403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
84195fce210SBarry Smith }
84295fce210SBarry Smith 
843ffeef943SBarry Smith /*@
844cab54364SBarry Smith   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
845fe2efc57SMark 
84620f4b53cSBarry Smith   Collective
847fe2efc57SMark 
848fe2efc57SMark   Input Parameters:
849fe2efc57SMark + A    - the star forest
850cab54364SBarry Smith . obj  - Optional object that provides the prefix for the option names
851736c3998SJose E. Roman - name - command line option
852fe2efc57SMark 
853fe2efc57SMark   Level: intermediate
854cab54364SBarry Smith 
85520662ed9SBarry Smith   Note:
85620662ed9SBarry Smith   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
85720662ed9SBarry Smith 
858*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
859fe2efc57SMark @*/
860d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
861d71ae5a4SJacob Faibussowitsch {
862fe2efc57SMark   PetscFunctionBegin;
863fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8649566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
866fe2efc57SMark }
867fe2efc57SMark 
868ffeef943SBarry Smith /*@
86995fce210SBarry Smith   PetscSFView - view a star forest
87095fce210SBarry Smith 
87195fce210SBarry Smith   Collective
87295fce210SBarry Smith 
8734165533cSJose E. Roman   Input Parameters:
87495fce210SBarry Smith + sf     - star forest
875cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
87695fce210SBarry Smith 
87795fce210SBarry Smith   Level: beginner
87895fce210SBarry Smith 
879*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
88095fce210SBarry Smith @*/
881d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
882d71ae5a4SJacob Faibussowitsch {
8839f196a02SMartin Diehl   PetscBool         isascii;
88495fce210SBarry Smith   PetscViewerFormat format;
88595fce210SBarry Smith 
88695fce210SBarry Smith   PetscFunctionBegin;
88795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8889566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
88995fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
89095fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
8919566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8929f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
8939f196a02SMartin Diehl   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
89495fce210SBarry Smith     PetscMPIInt rank;
8956497c311SBarry Smith     PetscInt    j;
89695fce210SBarry Smith 
8979566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
8989566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
899dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
90080153354SVaclav Hapla       if (!sf->graphset) {
9019566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
9029566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
9033ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
90480153354SVaclav Hapla       }
9059566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9069566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9076497c311SBarry Smith       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
908835f2295SStefano Zampini       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
9099566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9109566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
91195fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
91281bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
9136497c311SBarry Smith 
9149566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9159566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
9166497c311SBarry Smith         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
9179566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9189566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
9196497c311SBarry Smith         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
9206497c311SBarry Smith           PetscMPIInt i = perm[ii];
9216497c311SBarry Smith 
9229566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
92348a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
92495fce210SBarry Smith         }
9259566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
92695fce210SBarry Smith       }
9279566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9289566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
929dd5b3ca6SJunchao Zhang     }
9309566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
93195fce210SBarry Smith   }
932dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
9333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
93495fce210SBarry Smith }
93595fce210SBarry Smith 
93695fce210SBarry Smith /*@C
937*2f04c522SBarry Smith   PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process
93895fce210SBarry Smith 
93995fce210SBarry Smith   Not Collective
94095fce210SBarry Smith 
9414165533cSJose E. Roman   Input Parameter:
94295fce210SBarry Smith . sf - star forest
94395fce210SBarry Smith 
9444165533cSJose E. Roman   Output Parameters:
945*2f04c522SBarry Smith + nranks  - number of MPI processes referenced by local part
946*2f04c522SBarry Smith . ranks   - [`nranks`] array of MPI ranks
947*2f04c522SBarry Smith . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process
948*2f04c522SBarry Smith . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL`
949*2f04c522SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL`
95095fce210SBarry Smith 
95195fce210SBarry Smith   Level: developer
95295fce210SBarry Smith 
953*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()`
95495fce210SBarry Smith @*/
9559c9354e5SBarry Smith PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
956d71ae5a4SJacob Faibussowitsch {
95795fce210SBarry Smith   PetscFunctionBegin;
95895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
95928b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
960dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9619927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
962dec1416fSJunchao Zhang   } else {
963dec1416fSJunchao Zhang     /* The generic implementation */
96495fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
96595fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
96695fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
96795fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
96895fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
969dec1416fSJunchao Zhang   }
9703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
97195fce210SBarry Smith }
97295fce210SBarry Smith 
9738750ddebSJunchao Zhang /*@C
974*2f04c522SBarry Smith   PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process
9758750ddebSJunchao Zhang 
9768750ddebSJunchao Zhang   Not Collective
9778750ddebSJunchao Zhang 
9784165533cSJose E. Roman   Input Parameter:
9798750ddebSJunchao Zhang . sf - star forest
9808750ddebSJunchao Zhang 
9814165533cSJose E. Roman   Output Parameters:
982*2f04c522SBarry Smith + niranks  - number of leaf MPI processes referencing roots on this process
983*2f04c522SBarry Smith . iranks   - [`niranks`] array of MPI ranks
984*2f04c522SBarry Smith . ioffset  - [`niranks`+1] offset in `irootloc` for each MPI process
985*2f04c522SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process
9868750ddebSJunchao Zhang 
9878750ddebSJunchao Zhang   Level: developer
9888750ddebSJunchao Zhang 
989*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
9908750ddebSJunchao Zhang @*/
9919c9354e5SBarry Smith PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
992d71ae5a4SJacob Faibussowitsch {
9938750ddebSJunchao Zhang   PetscFunctionBegin;
9948750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
99528b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
9968750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
9979927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
9988750ddebSJunchao Zhang   } else {
9998750ddebSJunchao Zhang     PetscSFType type;
10009566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
100198921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
10028750ddebSJunchao Zhang   }
10033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10048750ddebSJunchao Zhang }
10058750ddebSJunchao Zhang 
1006d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1007d71ae5a4SJacob Faibussowitsch {
1008b5a8e515SJed Brown   PetscInt i;
1009b5a8e515SJed Brown   for (i = 0; i < n; i++) {
1010b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
1011b5a8e515SJed Brown   }
1012b5a8e515SJed Brown   return PETSC_FALSE;
1013b5a8e515SJed Brown }
1014b5a8e515SJed Brown 
101595fce210SBarry Smith /*@C
1016*2f04c522SBarry Smith   PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations.
101721c688dcSJed Brown 
101821c688dcSJed Brown   Collective
101921c688dcSJed Brown 
10204165533cSJose E. Roman   Input Parameters:
1021cab54364SBarry Smith + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1022cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
102321c688dcSJed Brown 
102421c688dcSJed Brown   Level: developer
102521c688dcSJed Brown 
1026*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
102721c688dcSJed Brown @*/
1028d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1029d71ae5a4SJacob Faibussowitsch {
1030eec179cfSJacob Faibussowitsch   PetscHMapI    table;
1031eec179cfSJacob Faibussowitsch   PetscHashIter pos;
10326497c311SBarry Smith   PetscMPIInt   size, groupsize, *groupranks, *ranks;
10336497c311SBarry Smith   PetscInt     *rcount;
10346497c311SBarry Smith   PetscInt      irank, sfnrank, ranksi;
10356497c311SBarry Smith   PetscMPIInt   i, orank = -1;
103621c688dcSJed Brown 
103721c688dcSJed Brown   PetscFunctionBegin;
103821c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
103929046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1041eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(10, &table));
104221c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
104321c688dcSJed Brown     /* Log 1-based rank */
1044eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
104521c688dcSJed Brown   }
10466497c311SBarry Smith   PetscCall(PetscHMapIGetSize(table, &sfnrank));
10476497c311SBarry Smith   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
10489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1050eec179cfSJacob Faibussowitsch   PetscHashIterBegin(table, pos);
105121c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10526497c311SBarry Smith     PetscHashIterGetKey(table, pos, ranksi);
10536497c311SBarry Smith     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1054eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(table, pos, rcount[i]);
1055eec179cfSJacob Faibussowitsch     PetscHashIterNext(table, pos);
105621c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
105721c688dcSJed Brown   }
1058eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&table));
1059b5a8e515SJed Brown 
1060b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1061b5a8e515SJed Brown   {
10627fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1063b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10646497c311SBarry Smith 
10659566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10669566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10679566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10689566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1069b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10709566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10719566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10729566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1073b5a8e515SJed Brown   }
1074b5a8e515SJed Brown 
1075b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1076b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1077b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1078b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1079b5a8e515SJed Brown     }
1080b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1081b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1082b5a8e515SJed Brown     }
1083b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
10846497c311SBarry Smith       PetscMPIInt tmprank;
10856497c311SBarry Smith       PetscInt    tmpcount;
1086247e8311SStefano Zampini 
1087b5a8e515SJed Brown       tmprank             = ranks[i];
1088b5a8e515SJed Brown       tmpcount            = rcount[i];
1089b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1090b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1091b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1092b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1093b5a8e515SJed Brown       sf->ndranks++;
1094b5a8e515SJed Brown     }
1095b5a8e515SJed Brown   }
10969566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
10976497c311SBarry Smith   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
10986497c311SBarry Smith   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
109921c688dcSJed Brown   sf->roffset[0] = 0;
110021c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
11019566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
110221c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
110321c688dcSJed Brown     rcount[i]          = 0;
110421c688dcSJed Brown   }
1105247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1106247e8311SStefano Zampini     /* short circuit */
1107247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
110821c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
1109835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1110835f2295SStefano Zampini       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1111b5a8e515SJed Brown       if (irank < 0) {
1112835f2295SStefano Zampini         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1113b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
111421c688dcSJed Brown       }
1115247e8311SStefano Zampini     }
1116835f2295SStefano Zampini     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
111721c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
111821c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
111921c688dcSJed Brown     rcount[irank]++;
112021c688dcSJed Brown   }
11219566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
11223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
112321c688dcSJed Brown }
112421c688dcSJed Brown 
112521c688dcSJed Brown /*@C
112695fce210SBarry Smith   PetscSFGetGroups - gets incoming and outgoing process groups
112795fce210SBarry Smith 
112895fce210SBarry Smith   Collective
112995fce210SBarry Smith 
11304165533cSJose E. Roman   Input Parameter:
113195fce210SBarry Smith . sf - star forest
113295fce210SBarry Smith 
11334165533cSJose E. Roman   Output Parameters:
113495fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
113595fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
113695fce210SBarry Smith 
113795fce210SBarry Smith   Level: developer
113895fce210SBarry Smith 
1139*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
114095fce210SBarry Smith @*/
1141d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1142d71ae5a4SJacob Faibussowitsch {
11437fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
114495fce210SBarry Smith 
114595fce210SBarry Smith   PetscFunctionBegin;
114608401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
114795fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
114895fce210SBarry Smith     PetscInt        i;
114995fce210SBarry Smith     const PetscInt *indegree;
11506497c311SBarry Smith     PetscMPIInt     rank, *outranks, *inranks, indegree0;
115195fce210SBarry Smith     PetscSFNode    *remote;
115295fce210SBarry Smith     PetscSF         bgcount;
115395fce210SBarry Smith 
115495fce210SBarry Smith     /* Compute the number of incoming ranks */
11559566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
115695fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
115795fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
115895fce210SBarry Smith       remote[i].index = 0;
115995fce210SBarry Smith     }
11609566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11619566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11629566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11639566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
116495fce210SBarry Smith     /* Enumerate the incoming ranks */
11659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11669566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
116795fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11689566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11699566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11709566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11716497c311SBarry Smith     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
11726497c311SBarry Smith     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
11739566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11749566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11759566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
117695fce210SBarry Smith   }
117795fce210SBarry Smith   *incoming = sf->ingroup;
117895fce210SBarry Smith 
117995fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11809566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11819566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11829566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
118395fce210SBarry Smith   }
118495fce210SBarry Smith   *outgoing = sf->outgroup;
11853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
118695fce210SBarry Smith }
118795fce210SBarry Smith 
118829046d53SLisandro Dalcin /*@
11890dd791a8SStefano Zampini   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11900dd791a8SStefano Zampini 
11910dd791a8SStefano Zampini   Collective
11920dd791a8SStefano Zampini 
11930dd791a8SStefano Zampini   Input Parameter:
11940dd791a8SStefano Zampini . sf - star forest
11950dd791a8SStefano Zampini 
11960dd791a8SStefano Zampini   Output Parameter:
1197*2f04c522SBarry Smith . rsf - the star forest with a single root per MPI process to perform communications
11980dd791a8SStefano Zampini 
11990dd791a8SStefano Zampini   Level: developer
12000dd791a8SStefano Zampini 
1201*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
12020dd791a8SStefano Zampini @*/
12030dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
12040dd791a8SStefano Zampini {
12050dd791a8SStefano Zampini   PetscFunctionBegin;
12060dd791a8SStefano Zampini   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12070dd791a8SStefano Zampini   PetscAssertPointer(rsf, 2);
12080dd791a8SStefano Zampini   if (!sf->rankssf) {
12090dd791a8SStefano Zampini     PetscSFNode       *rremotes;
12100dd791a8SStefano Zampini     const PetscMPIInt *ranks;
12116497c311SBarry Smith     PetscMPIInt        nranks;
12120dd791a8SStefano Zampini 
12130dd791a8SStefano Zampini     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12140dd791a8SStefano Zampini     PetscCall(PetscMalloc1(nranks, &rremotes));
12150dd791a8SStefano Zampini     for (PetscInt i = 0; i < nranks; i++) {
12160dd791a8SStefano Zampini       rremotes[i].rank  = ranks[i];
12170dd791a8SStefano Zampini       rremotes[i].index = 0;
12180dd791a8SStefano Zampini     }
12190dd791a8SStefano Zampini     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12200dd791a8SStefano Zampini     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12210dd791a8SStefano Zampini   }
12220dd791a8SStefano Zampini   *rsf = sf->rankssf;
12230dd791a8SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
12240dd791a8SStefano Zampini }
12250dd791a8SStefano Zampini 
12260dd791a8SStefano Zampini /*@
1227cab54364SBarry Smith   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
122895fce210SBarry Smith 
122995fce210SBarry Smith   Collective
123095fce210SBarry Smith 
12314165533cSJose E. Roman   Input Parameter:
123295fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
123395fce210SBarry Smith 
12344165533cSJose E. Roman   Output Parameter:
1235*2f04c522SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf)
123695fce210SBarry Smith 
123795fce210SBarry Smith   Level: developer
123895fce210SBarry Smith 
1239cab54364SBarry Smith   Note:
1240cab54364SBarry Smith   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
124195fce210SBarry Smith   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
124295fce210SBarry Smith   edge, it is a candidate for future optimization that might involve its removal.
124395fce210SBarry Smith 
1244*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
124595fce210SBarry Smith @*/
1246d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1247d71ae5a4SJacob Faibussowitsch {
124895fce210SBarry Smith   PetscFunctionBegin;
124995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12504f572ea9SToby Isaac   PetscAssertPointer(multi, 2);
125195fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12529566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
125395fce210SBarry Smith     *multi           = sf->multi;
1254013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12553ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
125695fce210SBarry Smith   }
125795fce210SBarry Smith   if (!sf->multi) {
125895fce210SBarry Smith     const PetscInt *indegree;
12599837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
126095fce210SBarry Smith     PetscSFNode    *remote;
126129046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12629566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12639566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12649566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
126595fce210SBarry Smith     inoffset[0] = 0;
126695fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12679837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
12689566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12699566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
127095fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
127176bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1272ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
127376bd3646SJed Brown     }
12749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
127595fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
127695fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
127738e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
127895fce210SBarry Smith     }
12799566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1280013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12819566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
128295fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
128395fce210SBarry Smith       PetscMPIInt  rank;
128495fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
128595fce210SBarry Smith       PetscSFNode *newremote;
12869566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
128795fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12889566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12899837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12909566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12919566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
129295fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
129395fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
129495fce210SBarry Smith         PetscInt j;
129595fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
12968e3a54c0SPierre Jolivet         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
129795fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
129895fce210SBarry Smith       }
12999566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13009566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13019566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
130295fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
130395fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
130401365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
130595fce210SBarry Smith       }
13069566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
13079566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
130895fce210SBarry Smith     }
13099566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
131095fce210SBarry Smith   }
131195fce210SBarry Smith   *multi = sf->multi;
13123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131395fce210SBarry Smith }
131495fce210SBarry Smith 
131595fce210SBarry Smith /*@C
131620662ed9SBarry Smith   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
131795fce210SBarry Smith 
131895fce210SBarry Smith   Collective
131995fce210SBarry Smith 
13204165533cSJose E. Roman   Input Parameters:
132195fce210SBarry Smith + sf        - original star forest
1322*2f04c522SBarry Smith . nselected - number of selected roots on this MPI process
1323*2f04c522SBarry Smith - selected  - indices of the selected roots on this MPI process
132495fce210SBarry Smith 
13254165533cSJose E. Roman   Output Parameter:
1326cd620004SJunchao Zhang . esf - new star forest
132795fce210SBarry Smith 
132895fce210SBarry Smith   Level: advanced
132995fce210SBarry Smith 
133095fce210SBarry Smith   Note:
1331cab54364SBarry Smith   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
133295fce210SBarry Smith   be done by calling PetscSFGetGraph().
133395fce210SBarry Smith 
1334*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
133595fce210SBarry Smith @*/
1336cf84bf9eSBarry Smith PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1337d71ae5a4SJacob Faibussowitsch {
1338cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1339cd620004SJunchao Zhang   const PetscInt    *ilocal;
1340cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1341ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1342f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1343f659e5c7SJunchao Zhang   MPI_Comm           comm;
134495fce210SBarry Smith 
134595fce210SBarry Smith   PetscFunctionBegin;
134695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
134729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
13484f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
13494f572ea9SToby Isaac   PetscAssertPointer(esf, 4);
13500511a646SMatthew G. Knepley 
13519566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
13529566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13539566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13549566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1355cd620004SJunchao Zhang 
135676bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1357cd620004SJunchao Zhang     PetscBool dups;
13589566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
135928b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1360511e6246SStefano Zampini     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1361cd620004SJunchao Zhang   }
1362f659e5c7SJunchao Zhang 
1363dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1364dbbe0bcdSBarry Smith   else {
1365cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
13669566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1367cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
13689566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13698e3a54c0SPierre Jolivet     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1370cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1371cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13729566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13739566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1374ba2a7774SJunchao Zhang 
1375cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1376cd620004SJunchao Zhang     esf_nleaves = 0;
1377cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1378cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1379cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1380cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1381cd620004SJunchao Zhang       */
1382cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1383cd620004SJunchao Zhang     }
13849566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13859566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1386cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1387cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1388cd620004SJunchao Zhang       if (leafdata[j]) {
1389cd620004SJunchao Zhang         new_ilocal[n]        = j;
1390cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1391cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1392fc1ede2bSMatthew G. Knepley         ++n;
139395fce210SBarry Smith       }
139495fce210SBarry Smith     }
13959566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
13969566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
13979566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
13989566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1399f659e5c7SJunchao Zhang   }
14009566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
14013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
140295fce210SBarry Smith }
140395fce210SBarry Smith 
14042f5fb4c2SMatthew G. Knepley /*@C
140520662ed9SBarry Smith   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
14062f5fb4c2SMatthew G. Knepley 
14072f5fb4c2SMatthew G. Knepley   Collective
14082f5fb4c2SMatthew G. Knepley 
14094165533cSJose E. Roman   Input Parameters:
14102f5fb4c2SMatthew G. Knepley + sf        - original star forest
1411*2f04c522SBarry Smith . nselected - number of selected leaves on this MPI process
1412*2f04c522SBarry Smith - selected  - indices of the selected leaves on this MPI process
14132f5fb4c2SMatthew G. Knepley 
14144165533cSJose E. Roman   Output Parameter:
14152f5fb4c2SMatthew G. Knepley . newsf - new star forest
14162f5fb4c2SMatthew G. Knepley 
14172f5fb4c2SMatthew G. Knepley   Level: advanced
14182f5fb4c2SMatthew G. Knepley 
1419*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14202f5fb4c2SMatthew G. Knepley @*/
1421cf84bf9eSBarry Smith PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1422d71ae5a4SJacob Faibussowitsch {
1423f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1424f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1425f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1426f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1427f659e5c7SJunchao Zhang   MPI_Comm           comm;
14282f5fb4c2SMatthew G. Knepley 
14292f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
14302f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
143129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
14324f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
14334f572ea9SToby Isaac   PetscAssertPointer(newsf, 4);
14342f5fb4c2SMatthew G. Knepley 
1435f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
14369566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
14389566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
14399566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
144008401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1441f659e5c7SJunchao Zhang 
1442f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1443dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1444dbbe0bcdSBarry Smith   else {
14459566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14469566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
14479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1448f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1449f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1450f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1451f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1452f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
14532f5fb4c2SMatthew G. Knepley     }
14549566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14559566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1456f659e5c7SJunchao Zhang   }
14579566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
14583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14592f5fb4c2SMatthew G. Knepley }
14602f5fb4c2SMatthew G. Knepley 
146195fce210SBarry Smith /*@C
1462cab54364SBarry Smith   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14633482bfa8SJunchao Zhang 
1464c3339decSBarry Smith   Collective
14653482bfa8SJunchao Zhang 
14664165533cSJose E. Roman   Input Parameters:
14673482bfa8SJunchao Zhang + sf       - star forest on which to communicate
14683482bfa8SJunchao Zhang . unit     - data type associated with each node
14693482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14703482bfa8SJunchao Zhang - op       - operation to use for reduction
14713482bfa8SJunchao Zhang 
14724165533cSJose E. Roman   Output Parameter:
14733482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14743482bfa8SJunchao Zhang 
14753482bfa8SJunchao Zhang   Level: intermediate
14763482bfa8SJunchao Zhang 
147720662ed9SBarry Smith   Note:
1478*2f04c522SBarry Smith   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1479*2f04c522SBarry Smith   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1480cab54364SBarry Smith   use `PetscSFBcastWithMemTypeBegin()` instead.
1481cab54364SBarry Smith 
1482*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14833482bfa8SJunchao Zhang @*/
1484d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1485d71ae5a4SJacob Faibussowitsch {
1486eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14873482bfa8SJunchao Zhang 
14883482bfa8SJunchao Zhang   PetscFunctionBegin;
14893482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14909566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14919566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14929566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
14939566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1494dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14959566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
14963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14973482bfa8SJunchao Zhang }
14983482bfa8SJunchao Zhang 
14993482bfa8SJunchao Zhang /*@C
150020662ed9SBarry Smith   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
150120662ed9SBarry Smith   to `PetscSFBcastEnd()`
1502d0295fc0SJunchao Zhang 
1503c3339decSBarry Smith   Collective
1504d0295fc0SJunchao Zhang 
15054165533cSJose E. Roman   Input Parameters:
1506d0295fc0SJunchao Zhang + sf        - star forest on which to communicate
1507d0295fc0SJunchao Zhang . unit      - data type associated with each node
1508*2f04c522SBarry Smith . rootmtype - memory type of `rootdata`
1509d0295fc0SJunchao Zhang . rootdata  - buffer to broadcast
1510*2f04c522SBarry Smith . leafmtype - memory type of `leafdata`
1511d0295fc0SJunchao Zhang - op        - operation to use for reduction
1512d0295fc0SJunchao Zhang 
15134165533cSJose E. Roman   Output Parameter:
1514d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1515d0295fc0SJunchao Zhang 
1516d0295fc0SJunchao Zhang   Level: intermediate
1517d0295fc0SJunchao Zhang 
1518*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1519d0295fc0SJunchao Zhang @*/
1520d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1521d71ae5a4SJacob Faibussowitsch {
1522d0295fc0SJunchao Zhang   PetscFunctionBegin;
1523d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15249566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15259566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1526dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15279566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1529d0295fc0SJunchao Zhang }
1530d0295fc0SJunchao Zhang 
1531d0295fc0SJunchao Zhang /*@C
153220662ed9SBarry Smith   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15333482bfa8SJunchao Zhang 
15343482bfa8SJunchao Zhang   Collective
15353482bfa8SJunchao Zhang 
15364165533cSJose E. Roman   Input Parameters:
15373482bfa8SJunchao Zhang + sf       - star forest
15383482bfa8SJunchao Zhang . unit     - data type
15393482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15403482bfa8SJunchao Zhang - op       - operation to use for reduction
15413482bfa8SJunchao Zhang 
15424165533cSJose E. Roman   Output Parameter:
15433482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15443482bfa8SJunchao Zhang 
15453482bfa8SJunchao Zhang   Level: intermediate
15463482bfa8SJunchao Zhang 
1547*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15483482bfa8SJunchao Zhang @*/
1549d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1550d71ae5a4SJacob Faibussowitsch {
15513482bfa8SJunchao Zhang   PetscFunctionBegin;
15523482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15539566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1554dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15559566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15573482bfa8SJunchao Zhang }
15583482bfa8SJunchao Zhang 
15593482bfa8SJunchao Zhang /*@C
1560*2f04c522SBarry Smith   PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()`
156195fce210SBarry Smith 
156295fce210SBarry Smith   Collective
156395fce210SBarry Smith 
15644165533cSJose E. Roman   Input Parameters:
156595fce210SBarry Smith + sf       - star forest
156695fce210SBarry Smith . unit     - data type
1567*2f04c522SBarry Smith . leafdata - values to reduce (communicate)
156895fce210SBarry Smith - op       - reduction operation
156995fce210SBarry Smith 
15704165533cSJose E. Roman   Output Parameter:
1571*2f04c522SBarry Smith . rootdata - result of reduction of values (`leafdata`) from all leaves to each root
157295fce210SBarry Smith 
157395fce210SBarry Smith   Level: intermediate
157495fce210SBarry Smith 
157520662ed9SBarry Smith   Note:
1576*2f04c522SBarry Smith   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1577*2f04c522SBarry Smith   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1578cab54364SBarry Smith   use `PetscSFReduceWithMemTypeBegin()` instead.
1579d0295fc0SJunchao Zhang 
1580*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype`
158195fce210SBarry Smith @*/
1582d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1583d71ae5a4SJacob Faibussowitsch {
1584eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
158595fce210SBarry Smith 
158695fce210SBarry Smith   PetscFunctionBegin;
158795fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15889566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15899566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15909566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15919566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1592f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15939566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
159595fce210SBarry Smith }
159695fce210SBarry Smith 
159795fce210SBarry Smith /*@C
1598*2f04c522SBarry Smith   PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1599d0295fc0SJunchao Zhang 
1600d0295fc0SJunchao Zhang   Collective
1601d0295fc0SJunchao Zhang 
16024165533cSJose E. Roman   Input Parameters:
1603d0295fc0SJunchao Zhang + sf        - star forest
1604d0295fc0SJunchao Zhang . unit      - data type
1605*2f04c522SBarry Smith . leafmtype - memory type of `leafdata`
1606d0295fc0SJunchao Zhang . leafdata  - values to reduce
1607*2f04c522SBarry Smith . rootmtype - memory type of `rootdata`
1608d0295fc0SJunchao Zhang - op        - reduction operation
1609d0295fc0SJunchao Zhang 
16104165533cSJose E. Roman   Output Parameter:
1611d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1612d0295fc0SJunchao Zhang 
1613d0295fc0SJunchao Zhang   Level: intermediate
1614d0295fc0SJunchao Zhang 
1615*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1616d0295fc0SJunchao Zhang @*/
1617d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1618d71ae5a4SJacob Faibussowitsch {
1619d0295fc0SJunchao Zhang   PetscFunctionBegin;
1620d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16219566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16229566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1623f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16249566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1626d0295fc0SJunchao Zhang }
1627d0295fc0SJunchao Zhang 
1628d0295fc0SJunchao Zhang /*@C
162920662ed9SBarry Smith   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
163095fce210SBarry Smith 
163195fce210SBarry Smith   Collective
163295fce210SBarry Smith 
16334165533cSJose E. Roman   Input Parameters:
163495fce210SBarry Smith + sf       - star forest
163595fce210SBarry Smith . unit     - data type
163695fce210SBarry Smith . leafdata - values to reduce
163795fce210SBarry Smith - op       - reduction operation
163895fce210SBarry Smith 
16394165533cSJose E. Roman   Output Parameter:
164095fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
164195fce210SBarry Smith 
164295fce210SBarry Smith   Level: intermediate
164395fce210SBarry Smith 
1644*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
164595fce210SBarry Smith @*/
1646d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1647d71ae5a4SJacob Faibussowitsch {
164895fce210SBarry Smith   PetscFunctionBegin;
164995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16509566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1651dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16529566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
165495fce210SBarry Smith }
165595fce210SBarry Smith 
165695fce210SBarry Smith /*@C
1657*2f04c522SBarry Smith   PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`,
1658cab54364SBarry Smith   to be completed with `PetscSFFetchAndOpEnd()`
1659a1729e3fSJunchao Zhang 
1660a1729e3fSJunchao Zhang   Collective
1661a1729e3fSJunchao Zhang 
16624165533cSJose E. Roman   Input Parameters:
1663a1729e3fSJunchao Zhang + sf       - star forest
1664a1729e3fSJunchao Zhang . unit     - data type
1665a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1666a1729e3fSJunchao Zhang - op       - operation to use for reduction
1667a1729e3fSJunchao Zhang 
16684165533cSJose E. Roman   Output Parameters:
1669a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1670*2f04c522SBarry Smith - leafupdate - state at each leaf's respective root immediately prior to atomic update
1671a1729e3fSJunchao Zhang 
1672a1729e3fSJunchao Zhang   Level: advanced
1673a1729e3fSJunchao Zhang 
1674a1729e3fSJunchao Zhang   Note:
1675a1729e3fSJunchao Zhang   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1676a1729e3fSJunchao Zhang   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1677a1729e3fSJunchao Zhang   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1678a1729e3fSJunchao Zhang   integers.
1679a1729e3fSJunchao Zhang 
1680*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1681a1729e3fSJunchao Zhang @*/
1682d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1683d71ae5a4SJacob Faibussowitsch {
1684eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1685a1729e3fSJunchao Zhang 
1686a1729e3fSJunchao Zhang   PetscFunctionBegin;
1687a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16889566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16899566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16909566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16919566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
16929566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
169308401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1694dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16959566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1697a1729e3fSJunchao Zhang }
1698a1729e3fSJunchao Zhang 
1699a1729e3fSJunchao Zhang /*@C
1700cab54364SBarry Smith   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1701*2f04c522SBarry Smith   applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()`
1702d3b3e55cSJunchao Zhang 
1703d3b3e55cSJunchao Zhang   Collective
1704d3b3e55cSJunchao Zhang 
1705d3b3e55cSJunchao Zhang   Input Parameters:
1706d3b3e55cSJunchao Zhang + sf              - star forest
1707d3b3e55cSJunchao Zhang . unit            - data type
1708*2f04c522SBarry Smith . rootmtype       - memory type of `rootdata`
1709*2f04c522SBarry Smith . leafmtype       - memory type of `leafdata`
1710d3b3e55cSJunchao Zhang . leafdata        - leaf values to use in reduction
1711*2f04c522SBarry Smith . leafupdatemtype - memory type of `leafupdate`
1712d3b3e55cSJunchao Zhang - op              - operation to use for reduction
1713d3b3e55cSJunchao Zhang 
1714d3b3e55cSJunchao Zhang   Output Parameters:
1715d3b3e55cSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1716*2f04c522SBarry Smith - leafupdate - state at each leaf's respective root immediately prior to atomic update
1717d3b3e55cSJunchao Zhang 
1718d3b3e55cSJunchao Zhang   Level: advanced
1719d3b3e55cSJunchao Zhang 
1720cab54364SBarry Smith   Note:
1721cab54364SBarry Smith   See `PetscSFFetchAndOpBegin()` for more details.
1722d3b3e55cSJunchao Zhang 
1723*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1724d3b3e55cSJunchao Zhang @*/
1725d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1726d71ae5a4SJacob Faibussowitsch {
1727d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1728d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17299566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
17309566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
173108401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1732dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17339566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1735d3b3e55cSJunchao Zhang }
1736d3b3e55cSJunchao Zhang 
1737d3b3e55cSJunchao Zhang /*@C
173820662ed9SBarry Smith   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1739*2f04c522SBarry Smith   to fetch values from roots and update atomically by applying operation using `leafdata`
1740a1729e3fSJunchao Zhang 
1741a1729e3fSJunchao Zhang   Collective
1742a1729e3fSJunchao Zhang 
17434165533cSJose E. Roman   Input Parameters:
1744a1729e3fSJunchao Zhang + sf       - star forest
1745a1729e3fSJunchao Zhang . unit     - data type
1746a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1747a1729e3fSJunchao Zhang - op       - operation to use for reduction
1748a1729e3fSJunchao Zhang 
17494165533cSJose E. Roman   Output Parameters:
1750a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1751*2f04c522SBarry Smith - leafupdate - state at each leaf's respective root immediately prior to atomic update
1752a1729e3fSJunchao Zhang 
1753a1729e3fSJunchao Zhang   Level: advanced
1754a1729e3fSJunchao Zhang 
1755*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1756a1729e3fSJunchao Zhang @*/
1757d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1758d71ae5a4SJacob Faibussowitsch {
1759a1729e3fSJunchao Zhang   PetscFunctionBegin;
1760a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17619566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1762dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17639566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1765a1729e3fSJunchao Zhang }
1766a1729e3fSJunchao Zhang 
1767a1729e3fSJunchao Zhang /*@C
1768*2f04c522SBarry Smith   PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
176995fce210SBarry Smith 
177095fce210SBarry Smith   Collective
177195fce210SBarry Smith 
17724165533cSJose E. Roman   Input Parameter:
177395fce210SBarry Smith . sf - star forest
177495fce210SBarry Smith 
17754165533cSJose E. Roman   Output Parameter:
1776*2f04c522SBarry Smith . degree - degree (the number of leaves) of each root vertex
177795fce210SBarry Smith 
177895fce210SBarry Smith   Level: advanced
177995fce210SBarry Smith 
1780cab54364SBarry Smith   Note:
178120662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1782ffe67aa5SVáclav Hapla 
1783*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
178495fce210SBarry Smith @*/
17856497c311SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1786d71ae5a4SJacob Faibussowitsch {
178795fce210SBarry Smith   PetscFunctionBegin;
178895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
178995fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
17904f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
1791803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
17925b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
179328b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
17945b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
17959566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
17969566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
179729046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
17989837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
17999566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
180095fce210SBarry Smith   }
180195fce210SBarry Smith   *degree = NULL;
18023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
180395fce210SBarry Smith }
180495fce210SBarry Smith 
180595fce210SBarry Smith /*@C
1806cab54364SBarry Smith   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
180795fce210SBarry Smith 
180895fce210SBarry Smith   Collective
180995fce210SBarry Smith 
18104165533cSJose E. Roman   Input Parameter:
181195fce210SBarry Smith . sf - star forest
181295fce210SBarry Smith 
18134165533cSJose E. Roman   Output Parameter:
181495fce210SBarry Smith . degree - degree of each root vertex
181595fce210SBarry Smith 
181695fce210SBarry Smith   Level: developer
181795fce210SBarry Smith 
1818cab54364SBarry Smith   Note:
181920662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1820ffe67aa5SVáclav Hapla 
1821*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
182295fce210SBarry Smith @*/
18239c9354e5SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1824d71ae5a4SJacob Faibussowitsch {
182595fce210SBarry Smith   PetscFunctionBegin;
182695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
182795fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
18284f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
182995fce210SBarry Smith   if (!sf->degreeknown) {
183028b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18319566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18329566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
183395fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
183495fce210SBarry Smith   }
183595fce210SBarry Smith   *degree = sf->degree;
18363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
183795fce210SBarry Smith }
183895fce210SBarry Smith 
1839673100f5SVaclav Hapla /*@C
184020662ed9SBarry Smith   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
184166dfcd1aSVaclav Hapla   Each multi-root is assigned index of the corresponding original root.
1842673100f5SVaclav Hapla 
1843673100f5SVaclav Hapla   Collective
1844673100f5SVaclav Hapla 
18454165533cSJose E. Roman   Input Parameters:
1846673100f5SVaclav Hapla + sf     - star forest
1847*2f04c522SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()`
1848673100f5SVaclav Hapla 
18494165533cSJose E. Roman   Output Parameters:
185020662ed9SBarry Smith + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
185120662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1852673100f5SVaclav Hapla 
1853673100f5SVaclav Hapla   Level: developer
1854673100f5SVaclav Hapla 
1855cab54364SBarry Smith   Note:
1856*2f04c522SBarry Smith   The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed.
1857ffe67aa5SVáclav Hapla 
1858*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1859673100f5SVaclav Hapla @*/
1860d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1861d71ae5a4SJacob Faibussowitsch {
1862673100f5SVaclav Hapla   PetscSF  msf;
186363bfac88SBarry Smith   PetscInt k = 0, nroots, nmroots;
1864673100f5SVaclav Hapla 
1865673100f5SVaclav Hapla   PetscFunctionBegin;
1866673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18679566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18684f572ea9SToby Isaac   if (nroots) PetscAssertPointer(degree, 2);
18694f572ea9SToby Isaac   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18704f572ea9SToby Isaac   PetscAssertPointer(multiRootsOrigNumbering, 4);
18719566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
18729566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18739566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
187463bfac88SBarry Smith   for (PetscInt i = 0; i < nroots; i++) {
1875673100f5SVaclav Hapla     if (!degree[i]) continue;
187663bfac88SBarry Smith     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1877673100f5SVaclav Hapla   }
187808401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
187966dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
18803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1881673100f5SVaclav Hapla }
1882673100f5SVaclav Hapla 
188395fce210SBarry Smith /*@C
1884cab54364SBarry Smith   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
188595fce210SBarry Smith 
188695fce210SBarry Smith   Collective
188795fce210SBarry Smith 
18884165533cSJose E. Roman   Input Parameters:
188995fce210SBarry Smith + sf       - star forest
189095fce210SBarry Smith . unit     - data type
189195fce210SBarry Smith - leafdata - leaf data to gather to roots
189295fce210SBarry Smith 
18934165533cSJose E. Roman   Output Parameter:
1894*2f04c522SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
189595fce210SBarry Smith 
189695fce210SBarry Smith   Level: intermediate
189795fce210SBarry Smith 
1898*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
189995fce210SBarry Smith @*/
1900d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1901d71ae5a4SJacob Faibussowitsch {
1902a5526d50SJunchao Zhang   PetscSF multi = NULL;
190395fce210SBarry Smith 
190495fce210SBarry Smith   PetscFunctionBegin;
190595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19069566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19079566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19089566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191095fce210SBarry Smith }
191195fce210SBarry Smith 
191295fce210SBarry Smith /*@C
1913cab54364SBarry Smith   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
191495fce210SBarry Smith 
191595fce210SBarry Smith   Collective
191695fce210SBarry Smith 
19174165533cSJose E. Roman   Input Parameters:
191895fce210SBarry Smith + sf       - star forest
191995fce210SBarry Smith . unit     - data type
192095fce210SBarry Smith - leafdata - leaf data to gather to roots
192195fce210SBarry Smith 
19224165533cSJose E. Roman   Output Parameter:
1923*2f04c522SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
192495fce210SBarry Smith 
192595fce210SBarry Smith   Level: intermediate
192695fce210SBarry Smith 
1927*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
192895fce210SBarry Smith @*/
1929d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1930d71ae5a4SJacob Faibussowitsch {
1931a5526d50SJunchao Zhang   PetscSF multi = NULL;
193295fce210SBarry Smith 
193395fce210SBarry Smith   PetscFunctionBegin;
193495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19359566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19369566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
193895fce210SBarry Smith }
193995fce210SBarry Smith 
194095fce210SBarry Smith /*@C
1941cab54364SBarry Smith   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
194295fce210SBarry Smith 
194395fce210SBarry Smith   Collective
194495fce210SBarry Smith 
19454165533cSJose E. Roman   Input Parameters:
194695fce210SBarry Smith + sf            - star forest
194795fce210SBarry Smith . unit          - data type
1948*2f04c522SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has)
194995fce210SBarry Smith 
19504165533cSJose E. Roman   Output Parameter:
195195fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
195295fce210SBarry Smith 
195395fce210SBarry Smith   Level: intermediate
195495fce210SBarry Smith 
1955*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
195695fce210SBarry Smith @*/
1957d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1958d71ae5a4SJacob Faibussowitsch {
1959a5526d50SJunchao Zhang   PetscSF multi = NULL;
196095fce210SBarry Smith 
196195fce210SBarry Smith   PetscFunctionBegin;
196295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19639566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19649566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19659566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
196795fce210SBarry Smith }
196895fce210SBarry Smith 
196995fce210SBarry Smith /*@C
1970cab54364SBarry Smith   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
197195fce210SBarry Smith 
197295fce210SBarry Smith   Collective
197395fce210SBarry Smith 
19744165533cSJose E. Roman   Input Parameters:
197595fce210SBarry Smith + sf            - star forest
197695fce210SBarry Smith . unit          - data type
197795fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
197895fce210SBarry Smith 
19794165533cSJose E. Roman   Output Parameter:
198095fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
198195fce210SBarry Smith 
198295fce210SBarry Smith   Level: intermediate
198395fce210SBarry Smith 
1984*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
198595fce210SBarry Smith @*/
1986d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1987d71ae5a4SJacob Faibussowitsch {
1988a5526d50SJunchao Zhang   PetscSF multi = NULL;
198995fce210SBarry Smith 
199095fce210SBarry Smith   PetscFunctionBegin;
199195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19929566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19939566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
199595fce210SBarry Smith }
1996a7b3aa13SAta Mesgarnejad 
1997d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1998d71ae5a4SJacob Faibussowitsch {
1999a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
2000a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
2001a072220fSLawrence Mitchell   PetscHSetI      seen;
2002a072220fSLawrence Mitchell 
2003a072220fSLawrence Mitchell   PetscFunctionBegin;
2004b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
20059566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
20069566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
2007a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
2008a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
20099566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
2010a072220fSLawrence Mitchell     }
20119566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
201208401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
20139566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
2014b458e8f1SJose E. Roman   }
20153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2016a072220fSLawrence Mitchell }
201754729392SStefano Zampini 
2018a7b3aa13SAta Mesgarnejad /*@
2019cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2020a7b3aa13SAta Mesgarnejad 
2021a7b3aa13SAta Mesgarnejad   Input Parameters:
2022cab54364SBarry Smith + sfA - The first `PetscSF`
2023cab54364SBarry Smith - sfB - The second `PetscSF`
2024a7b3aa13SAta Mesgarnejad 
20252fe279fdSBarry Smith   Output Parameter:
2026cab54364SBarry Smith . sfBA - The composite `PetscSF`
2027a7b3aa13SAta Mesgarnejad 
2028a7b3aa13SAta Mesgarnejad   Level: developer
2029a7b3aa13SAta Mesgarnejad 
2030a072220fSLawrence Mitchell   Notes:
2031cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
203254729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
203354729392SStefano Zampini 
203420662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
203520662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2036*2f04c522SBarry Smith   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2037*2f04c522SBarry Smith   `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes.
2038a072220fSLawrence Mitchell 
2039*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2040a7b3aa13SAta Mesgarnejad @*/
2041d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2042d71ae5a4SJacob Faibussowitsch {
2043a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
2044d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
204554729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
204654729392SStefano Zampini   PetscInt          *localPointsBA;
204754729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
204854729392SStefano Zampini   PetscBool          denseB;
2049a7b3aa13SAta Mesgarnejad 
2050a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
2051a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
205229046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
205329046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
205429046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
205554729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
20564f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
20579566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20589566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
205954729392SStefano Zampini 
20609566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20619566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
206220662ed9SBarry Smith   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
206320662ed9SBarry Smith      numRootsB; otherwise, garbage will be broadcasted.
206420662ed9SBarry Smith      Example (comm size = 1):
206520662ed9SBarry Smith      sfA: 0 <- (0, 0)
206620662ed9SBarry Smith      sfB: 100 <- (0, 0)
206720662ed9SBarry Smith           101 <- (0, 1)
206820662ed9SBarry Smith      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
206920662ed9SBarry Smith      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
207020662ed9SBarry Smith      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
207120662ed9SBarry Smith      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
20729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
207354729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
207454729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
207554729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
207654729392SStefano Zampini   }
207754729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
20780ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20790ea77edaSksagiyam 
20800ea77edaSksagiyam     if (localp >= numRootsB) continue;
20810ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
208254729392SStefano Zampini   }
2083d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20849566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20860ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20870ea77edaSksagiyam     leafdataB[i].rank  = -1;
20880ea77edaSksagiyam     leafdataB[i].index = -1;
20890ea77edaSksagiyam   }
20906497c311SBarry Smith   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20916497c311SBarry Smith   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20929566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2093d41018fbSJunchao Zhang 
209454729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
209554729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
209654729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
209754729392SStefano Zampini     else numLeavesBA++;
209854729392SStefano Zampini   }
209954729392SStefano Zampini   if (denseB) {
2100d41018fbSJunchao Zhang     localPointsBA  = NULL;
2101d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2102d41018fbSJunchao Zhang   } else {
21039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
21049566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
210554729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
210654729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
210754729392SStefano Zampini 
210854729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
210954729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
211054729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
211154729392SStefano Zampini       numLeavesBA++;
211254729392SStefano Zampini     }
21139566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2114d41018fbSJunchao Zhang   }
21159566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21169566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21179566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2119a7b3aa13SAta Mesgarnejad }
21201c6ba672SJunchao Zhang 
212104c0ada0SJunchao Zhang /*@
2122cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
212304c0ada0SJunchao Zhang 
212404c0ada0SJunchao Zhang   Input Parameters:
2125cab54364SBarry Smith + sfA - The first `PetscSF`
2126cab54364SBarry Smith - sfB - The second `PetscSF`
212704c0ada0SJunchao Zhang 
21282fe279fdSBarry Smith   Output Parameter:
2129cab54364SBarry Smith . sfBA - The composite `PetscSF`.
213004c0ada0SJunchao Zhang 
213104c0ada0SJunchao Zhang   Level: developer
213204c0ada0SJunchao Zhang 
213354729392SStefano Zampini   Notes:
213420662ed9SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
213554729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
213620662ed9SBarry Smith   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
213754729392SStefano Zampini 
213820662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
213920662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2140*2f04c522SBarry Smith   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()`
214120662ed9SBarry Smith   on `sfA`, then
2142*2f04c522SBarry Smith   a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots.
214354729392SStefano Zampini 
2144*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
214504c0ada0SJunchao Zhang @*/
2146d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2147d71ae5a4SJacob Faibussowitsch {
214804c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
214904c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
215004c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
215154729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
215254729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21535b0d146aSStefano Zampini   MPI_Op             op;
21545b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21555b0d146aSStefano Zampini   PetscBool iswin;
21565b0d146aSStefano Zampini #endif
215704c0ada0SJunchao Zhang 
215804c0ada0SJunchao Zhang   PetscFunctionBegin;
215904c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
216004c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
216104c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
216204c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
216354729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
21644f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
21659566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21669566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
216754729392SStefano Zampini 
21689566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21699566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21705b0d146aSStefano Zampini 
21715b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
21725b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
217383df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21745b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
21755b0d146aSStefano Zampini      the root condition is not meet.
21765b0d146aSStefano Zampini    */
21775b0d146aSStefano Zampini   op = MPI_MAXLOC;
21785b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21795b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21809566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
218183df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21825b0d146aSStefano Zampini #endif
21835b0d146aSStefano Zampini 
21849566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
218654729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
218754729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
218854729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
218954729392SStefano Zampini   }
219054729392SStefano Zampini   if (localPointsA) {
219154729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
219254729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
219354729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
219454729392SStefano Zampini     }
219554729392SStefano Zampini   } else {
219654729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
219754729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
219854729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
219954729392SStefano Zampini     }
220054729392SStefano Zampini   }
220154729392SStefano Zampini 
22029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
22039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
220454729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
220554729392SStefano Zampini     remotePointsBA[i].rank  = -1;
220654729392SStefano Zampini     remotePointsBA[i].index = -1;
220754729392SStefano Zampini   }
220854729392SStefano Zampini 
22096497c311SBarry Smith   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22106497c311SBarry Smith   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22119566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
221254729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
221354729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
221454729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
221554729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
221654729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
221754729392SStefano Zampini     numLeavesBA++;
221854729392SStefano Zampini   }
22199566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22209566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
22219566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
222304c0ada0SJunchao Zhang }
222404c0ada0SJunchao Zhang 
22251c6ba672SJunchao Zhang /*
2226cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22271c6ba672SJunchao Zhang 
22282fe279fdSBarry Smith   Input Parameter:
2229cab54364SBarry Smith . sf - The global `PetscSF`
22301c6ba672SJunchao Zhang 
22312fe279fdSBarry Smith   Output Parameter:
2232cab54364SBarry Smith . out - The local `PetscSF`
2233cab54364SBarry Smith 
2234*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
22351c6ba672SJunchao Zhang  */
2236d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2237d71ae5a4SJacob Faibussowitsch {
22381c6ba672SJunchao Zhang   MPI_Comm           comm;
22391c6ba672SJunchao Zhang   PetscMPIInt        myrank;
22401c6ba672SJunchao Zhang   const PetscInt    *ilocal;
22411c6ba672SJunchao Zhang   const PetscSFNode *iremote;
22421c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
22431c6ba672SJunchao Zhang   PetscSFNode       *liremote;
22441c6ba672SJunchao Zhang   PetscSF            lsf;
22451c6ba672SJunchao Zhang 
22461c6ba672SJunchao Zhang   PetscFunctionBegin;
22471c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2248dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2249dbbe0bcdSBarry Smith   else {
2250835f2295SStefano Zampini     PetscMPIInt irank;
2251835f2295SStefano Zampini 
22521c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22539566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22549566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22551c6ba672SJunchao Zhang 
22561c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
22579566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22589371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
2259835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2260835f2295SStefano Zampini       if (irank == myrank) lnleaves++;
22619371c9d4SSatish Balay     }
22629566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
22639566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
22641c6ba672SJunchao Zhang 
22651c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
2266835f2295SStefano Zampini       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2267835f2295SStefano Zampini       if (irank == myrank) {
22681c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22691c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
22701c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
22711c6ba672SJunchao Zhang         j++;
22721c6ba672SJunchao Zhang       }
22731c6ba672SJunchao Zhang     }
22749566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22759566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
22769566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22779566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
22781c6ba672SJunchao Zhang     *out = lsf;
22791c6ba672SJunchao Zhang   }
22803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22811c6ba672SJunchao Zhang }
2282dd5b3ca6SJunchao Zhang 
2283dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2284d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2285d71ae5a4SJacob Faibussowitsch {
2286eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2287dd5b3ca6SJunchao Zhang 
2288dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2289dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22909566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
22919566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22929566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
22939566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2294dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22959566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
22963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2297dd5b3ca6SJunchao Zhang }
2298dd5b3ca6SJunchao Zhang 
2299157edd7aSVaclav Hapla /*@
2300*2f04c522SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF`
2301157edd7aSVaclav Hapla 
2302157edd7aSVaclav Hapla   Input Parameters:
2303157edd7aSVaclav Hapla + comm        - the communicator
2304cab54364SBarry Smith . nsfs        - the number of input `PetscSF`
2305cab54364SBarry Smith . sfs         - the array of input `PetscSF`
23061f40158dSVaclav Hapla . rootMode    - the root mode specifying how roots are handled
230720662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2308157edd7aSVaclav Hapla 
23092fe279fdSBarry Smith   Output Parameter:
2310cab54364SBarry Smith . newsf - The resulting `PetscSF`
2311157edd7aSVaclav Hapla 
23121f40158dSVaclav Hapla   Level: advanced
2313157edd7aSVaclav Hapla 
2314157edd7aSVaclav Hapla   Notes:
231520662ed9SBarry Smith   The communicator of all `PetscSF`s in `sfs` must be comm.
2316157edd7aSVaclav Hapla 
231720662ed9SBarry Smith   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
231820662ed9SBarry Smith 
231920662ed9SBarry Smith   The offsets in `leafOffsets` are added to the original leaf indices.
232020662ed9SBarry Smith 
2321*2f04c522SBarry Smith   If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
232220662ed9SBarry Smith   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
232320662ed9SBarry Smith 
232420662ed9SBarry Smith   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2325157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2326157edd7aSVaclav Hapla 
232720662ed9SBarry Smith   All root modes retain the essential connectivity condition.
2328*2f04c522SBarry Smith 
232920662ed9SBarry Smith   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2330*2f04c522SBarry Smith 
233120662ed9SBarry Smith   Parameter `rootMode` controls how the input root spaces are combined.
233220662ed9SBarry Smith   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
233320662ed9SBarry Smith   and is also the same in the output `PetscSF`.
2334*2f04c522SBarry Smith 
23351f40158dSVaclav Hapla   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23361f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2337*2f04c522SBarry Smith   roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23381f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23391593df67SStefano Zampini   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2340*2f04c522SBarry Smith   the original root MPI processes are ignored.
23411f40158dSVaclav Hapla   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2342*2f04c522SBarry Smith   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process
234320662ed9SBarry Smith   to keep the load balancing.
2344*2f04c522SBarry Smith   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes.
23451f40158dSVaclav Hapla 
23461f40158dSVaclav Hapla   Example:
23471f40158dSVaclav Hapla   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
234820662ed9SBarry Smith .vb
234920662ed9SBarry Smith   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
235020662ed9SBarry Smith   for m in {local,global,shared}; do
235120662ed9SBarry Smith     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
235220662ed9SBarry Smith   done
235320662ed9SBarry Smith .ve
235420662ed9SBarry Smith   we generate two identical `PetscSF`s sf_0 and sf_1,
235520662ed9SBarry Smith .vb
235620662ed9SBarry Smith   PetscSF Object: sf_0 2 MPI processes
235720662ed9SBarry Smith     type: basic
235820662ed9SBarry Smith     rank #leaves #roots
235920662ed9SBarry Smith     [ 0]       4      2
236020662ed9SBarry Smith     [ 1]       4      2
236120662ed9SBarry Smith     leaves      roots       roots in global numbering
236220662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
236320662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
236420662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
236520662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
236620662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
236720662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
236820662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
236920662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
237020662ed9SBarry Smith .ve
2371e33f79d8SJacob Faibussowitsch   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
237220662ed9SBarry Smith .vb
237320662ed9SBarry Smith   rootMode = local:
237420662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
237520662ed9SBarry Smith     type: basic
237620662ed9SBarry Smith     rank #leaves #roots
237720662ed9SBarry Smith     [ 0]       8      4
237820662ed9SBarry Smith     [ 1]       8      4
237920662ed9SBarry Smith     leaves      roots       roots in global numbering
238020662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
238120662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
238220662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   4
238320662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   5
238420662ed9SBarry Smith     ( 0,  4) <- ( 0,  2)  =   2
238520662ed9SBarry Smith     ( 0,  5) <- ( 0,  3)  =   3
238620662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
238720662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
238820662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
238920662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
239020662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   4
239120662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   5
239220662ed9SBarry Smith     ( 1,  4) <- ( 0,  2)  =   2
239320662ed9SBarry Smith     ( 1,  5) <- ( 0,  3)  =   3
239420662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
239520662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
239620662ed9SBarry Smith 
239720662ed9SBarry Smith   rootMode = global:
239820662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
239920662ed9SBarry Smith     type: basic
240020662ed9SBarry Smith     rank #leaves #roots
240120662ed9SBarry Smith     [ 0]       8      4
240220662ed9SBarry Smith     [ 1]       8      4
240320662ed9SBarry Smith     leaves      roots       roots in global numbering
240420662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
240520662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
240620662ed9SBarry Smith     ( 0,  2) <- ( 0,  2)  =   2
240720662ed9SBarry Smith     ( 0,  3) <- ( 0,  3)  =   3
240820662ed9SBarry Smith     ( 0,  4) <- ( 1,  0)  =   4
240920662ed9SBarry Smith     ( 0,  5) <- ( 1,  1)  =   5
241020662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
241120662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
241220662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
241320662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
241420662ed9SBarry Smith     ( 1,  2) <- ( 0,  2)  =   2
241520662ed9SBarry Smith     ( 1,  3) <- ( 0,  3)  =   3
241620662ed9SBarry Smith     ( 1,  4) <- ( 1,  0)  =   4
241720662ed9SBarry Smith     ( 1,  5) <- ( 1,  1)  =   5
241820662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
241920662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
242020662ed9SBarry Smith 
242120662ed9SBarry Smith   rootMode = shared:
242220662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
242320662ed9SBarry Smith     type: basic
242420662ed9SBarry Smith     rank #leaves #roots
242520662ed9SBarry Smith     [ 0]       8      2
242620662ed9SBarry Smith     [ 1]       8      2
242720662ed9SBarry Smith     leaves      roots       roots in global numbering
242820662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
242920662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
243020662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
243120662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
243220662ed9SBarry Smith     ( 0,  4) <- ( 0,  0)  =   0
243320662ed9SBarry Smith     ( 0,  5) <- ( 0,  1)  =   1
243420662ed9SBarry Smith     ( 0,  6) <- ( 1,  0)  =   2
243520662ed9SBarry Smith     ( 0,  7) <- ( 1,  1)  =   3
243620662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
243720662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
243820662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
243920662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
244020662ed9SBarry Smith     ( 1,  4) <- ( 0,  0)  =   0
244120662ed9SBarry Smith     ( 1,  5) <- ( 0,  1)  =   1
244220662ed9SBarry Smith     ( 1,  6) <- ( 1,  0)  =   2
244320662ed9SBarry Smith     ( 1,  7) <- ( 1,  1)  =   3
244420662ed9SBarry Smith .ve
24451f40158dSVaclav Hapla 
2446*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2447157edd7aSVaclav Hapla @*/
24481f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2449d71ae5a4SJacob Faibussowitsch {
2450157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2451157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2452157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2453157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2454157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
24551f40158dSVaclav Hapla   PetscLayout  glayout         = NULL;
24561f40158dSVaclav Hapla   PetscInt    *gremote         = NULL;
24571f40158dSVaclav Hapla   PetscMPIInt  rank, size;
2458157edd7aSVaclav Hapla 
2459157edd7aSVaclav Hapla   PetscFunctionBegin;
246012f479c1SVaclav Hapla   if (PetscDefined(USE_DEBUG)) {
2461157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2462157edd7aSVaclav Hapla 
24639566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2464157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24654f572ea9SToby Isaac     PetscAssertPointer(sfs, 3);
2466157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2467157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2468157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2469157edd7aSVaclav Hapla     }
24701f40158dSVaclav Hapla     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24714f572ea9SToby Isaac     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24724f572ea9SToby Isaac     PetscAssertPointer(newsf, 6);
24739566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2474157edd7aSVaclav Hapla   }
2475157edd7aSVaclav Hapla   if (!nsfs) {
24769566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
24779566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24783ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2479157edd7aSVaclav Hapla   }
24809566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24811f40158dSVaclav Hapla   PetscCallMPI(MPI_Comm_size(comm, &size));
2482157edd7aSVaclav Hapla 
24831f40158dSVaclav Hapla   /* Calculate leaf array offsets */
24849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2485157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2486157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2487157edd7aSVaclav Hapla     PetscInt nl;
2488157edd7aSVaclav Hapla 
24899566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2490157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2491157edd7aSVaclav Hapla   }
2492157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2493157edd7aSVaclav Hapla 
24941f40158dSVaclav Hapla   /* Calculate number of roots */
24951f40158dSVaclav Hapla   switch (rootMode) {
24961f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
24971f40158dSVaclav Hapla     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
24981f40158dSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
24991f40158dSVaclav Hapla       for (s = 1; s < nsfs; s++) {
25001f40158dSVaclav Hapla         PetscInt nr;
25011f40158dSVaclav Hapla 
25021f40158dSVaclav Hapla         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
25031f40158dSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
25041f40158dSVaclav Hapla       }
25051f40158dSVaclav Hapla     }
25061f40158dSVaclav Hapla   } break;
25071f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
25081f40158dSVaclav Hapla     /* Calculate also global layout in this case */
25091f40158dSVaclav Hapla     PetscInt    *nls;
25101f40158dSVaclav Hapla     PetscLayout *lts;
25111f40158dSVaclav Hapla     PetscInt   **inds;
25121f40158dSVaclav Hapla     PetscInt     j;
25131f40158dSVaclav Hapla     PetscInt     rootOffset = 0;
25141f40158dSVaclav Hapla 
25151f40158dSVaclav Hapla     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
25161f40158dSVaclav Hapla     PetscCall(PetscLayoutCreate(comm, &glayout));
25171f40158dSVaclav Hapla     glayout->bs = 1;
25181f40158dSVaclav Hapla     glayout->n  = 0;
25191f40158dSVaclav Hapla     glayout->N  = 0;
25201f40158dSVaclav Hapla     for (s = 0; s < nsfs; s++) {
25211f40158dSVaclav Hapla       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
25221f40158dSVaclav Hapla       glayout->n += lts[s]->n;
25231f40158dSVaclav Hapla       glayout->N += lts[s]->N;
25241f40158dSVaclav Hapla     }
25251f40158dSVaclav Hapla     PetscCall(PetscLayoutSetUp(glayout));
25261f40158dSVaclav Hapla     PetscCall(PetscMalloc1(nLeaves, &gremote));
25271f40158dSVaclav Hapla     for (s = 0, j = 0; s < nsfs; s++) {
25281f40158dSVaclav Hapla       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25291f40158dSVaclav Hapla       rootOffset += lts[s]->N;
25301f40158dSVaclav Hapla       PetscCall(PetscLayoutDestroy(&lts[s]));
25311f40158dSVaclav Hapla       PetscCall(PetscFree(inds[s]));
25321f40158dSVaclav Hapla     }
25331f40158dSVaclav Hapla     PetscCall(PetscFree3(lts, nls, inds));
25341f40158dSVaclav Hapla     nRoots = glayout->N;
25351f40158dSVaclav Hapla   } break;
25361f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25371f40158dSVaclav Hapla     /* nRoots calculated later in this case */
25381f40158dSVaclav Hapla     break;
25391f40158dSVaclav Hapla   default:
25401f40158dSVaclav Hapla     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25411f40158dSVaclav Hapla   }
25421f40158dSVaclav Hapla 
2543157edd7aSVaclav Hapla   if (!leafOffsets) {
2544157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2545157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2546157edd7aSVaclav Hapla       const PetscInt *ilocal;
2547157edd7aSVaclav Hapla 
25489566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2549157edd7aSVaclav Hapla       if (ilocal) {
2550157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2551157edd7aSVaclav Hapla         break;
2552157edd7aSVaclav Hapla       }
2553157edd7aSVaclav Hapla     }
2554157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2555157edd7aSVaclav Hapla   }
2556157edd7aSVaclav Hapla 
2557157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2558157edd7aSVaclav Hapla   ilocal_new = NULL;
2559157edd7aSVaclav Hapla   if (!all_ilocal_null) {
25609566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2561157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2562157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2563157edd7aSVaclav Hapla       const PetscInt *ilocal;
25648e3a54c0SPierre Jolivet       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2565157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2566157edd7aSVaclav Hapla 
25679566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2568157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2569157edd7aSVaclav Hapla     }
2570157edd7aSVaclav Hapla   }
2571157edd7aSVaclav Hapla 
2572157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
25731f40158dSVaclav Hapla   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25741f40158dSVaclav Hapla     PetscInt rootOffset = 0;
25751f40158dSVaclav Hapla 
25769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2577157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) {
2578157edd7aSVaclav Hapla       iremote_new[i].rank  = -1;
2579157edd7aSVaclav Hapla       iremote_new[i].index = -1;
2580157edd7aSVaclav Hapla     }
2581157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2582157edd7aSVaclav Hapla       PetscInt           i, nl, nr;
2583157edd7aSVaclav Hapla       PetscSF            tmp_sf;
2584157edd7aSVaclav Hapla       const PetscSFNode *iremote;
2585157edd7aSVaclav Hapla       PetscSFNode       *tmp_rootdata;
25868e3a54c0SPierre Jolivet       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2587157edd7aSVaclav Hapla 
25889566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25899566063dSJacob Faibussowitsch       PetscCall(PetscSFCreate(comm, &tmp_sf));
2590157edd7aSVaclav Hapla       /* create helper SF with contiguous leaves */
25919566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25929566063dSJacob Faibussowitsch       PetscCall(PetscSFSetUp(tmp_sf));
25939566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
25941f40158dSVaclav Hapla       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2595157edd7aSVaclav Hapla         for (i = 0; i < nr; i++) {
25961f40158dSVaclav Hapla           tmp_rootdata[i].index = i + rootOffset;
25976497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
2598157edd7aSVaclav Hapla         }
25991f40158dSVaclav Hapla         rootOffset += nr;
26001f40158dSVaclav Hapla       } else {
26011f40158dSVaclav Hapla         for (i = 0; i < nr; i++) {
26021f40158dSVaclav Hapla           tmp_rootdata[i].index = i;
26036497c311SBarry Smith           tmp_rootdata[i].rank  = rank;
26041f40158dSVaclav Hapla         }
26051f40158dSVaclav Hapla       }
26066497c311SBarry Smith       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26076497c311SBarry Smith       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26089566063dSJacob Faibussowitsch       PetscCall(PetscSFDestroy(&tmp_sf));
26099566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp_rootdata));
2610157edd7aSVaclav Hapla     }
2611aa624791SPierre Jolivet     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2612157edd7aSVaclav Hapla 
2613157edd7aSVaclav Hapla     /* Build the new SF */
26149566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
26159566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
26161f40158dSVaclav Hapla   } else {
26171f40158dSVaclav Hapla     /* Build the new SF */
26181f40158dSVaclav Hapla     PetscCall(PetscSFCreate(comm, newsf));
26191f40158dSVaclav Hapla     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
26201f40158dSVaclav Hapla   }
26219566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
26221f40158dSVaclav Hapla   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26231f40158dSVaclav Hapla   PetscCall(PetscLayoutDestroy(&glayout));
26241f40158dSVaclav Hapla   PetscCall(PetscFree(gremote));
26259566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
26263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2627157edd7aSVaclav Hapla }
26288e54d7e8SToby Isaac 
26298e54d7e8SToby Isaac /*@
2630*2f04c522SBarry Smith   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications.
26318e54d7e8SToby Isaac 
26328e54d7e8SToby Isaac   Collective
26338e54d7e8SToby Isaac 
26348e54d7e8SToby Isaac   Input Parameters:
26358e54d7e8SToby Isaac + sf       - star forest
2636*2f04c522SBarry Smith . unit     - the data type contained within `rootdata` and `leafdata`
2637*2f04c522SBarry Smith . rootdata - root data that will be used for multiple `PetscSF` communications
2638*2f04c522SBarry Smith - leafdata - leaf data that will be used for multiple `PetscSF` communications
26398e54d7e8SToby Isaac 
26408e54d7e8SToby Isaac   Level: advanced
26418e54d7e8SToby Isaac 
26428e54d7e8SToby Isaac   Notes:
26438e54d7e8SToby Isaac   Implementations of `PetscSF` can make optimizations
26448e54d7e8SToby Isaac   for repeated communication using the same memory regions, but these optimizations
26458e54d7e8SToby Isaac   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
26468e54d7e8SToby Isaac   The intended pattern is
26478e54d7e8SToby Isaac 
26488e54d7e8SToby Isaac .vb
26498e54d7e8SToby Isaac   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
26508e54d7e8SToby Isaac 
26518e54d7e8SToby Isaac   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
26528e54d7e8SToby Isaac   // repeated use of rootdata and leafdata will now be optimized
26538e54d7e8SToby Isaac 
26548e54d7e8SToby Isaac   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
26558e54d7e8SToby Isaac   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
26568e54d7e8SToby Isaac   // ...
26578e54d7e8SToby Isaac   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
26588e54d7e8SToby Isaac   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
26598e54d7e8SToby Isaac   // ... (other communications)
26608e54d7e8SToby Isaac 
26618e54d7e8SToby Isaac   // rootdata and leafdata must be deregistered before freeing
26628e54d7e8SToby Isaac   // skipping this can lead to undefined behavior including
26638e54d7e8SToby Isaac   // deadlocks
26648e54d7e8SToby Isaac   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
26658e54d7e8SToby Isaac 
26668e54d7e8SToby Isaac   // it is now safe to free rootdata and leafdata
26678e54d7e8SToby Isaac   PetscFree2(rootdata, leafdata);
26688e54d7e8SToby Isaac .ve
26698e54d7e8SToby Isaac 
26708e54d7e8SToby Isaac   If you do not register `rootdata` and `leafdata` it will not cause an error,
26718e54d7e8SToby Isaac   but optimizations that reduce the setup time for each communication cannot be
26728e54d7e8SToby Isaac   made.  Currently, the only implementation of `PetscSF` that benefits from
26738e54d7e8SToby Isaac   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
26748e54d7e8SToby Isaac   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
26758e54d7e8SToby Isaac 
2676*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
26778e54d7e8SToby Isaac @*/
26788e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26798e54d7e8SToby Isaac {
26808e54d7e8SToby Isaac   PetscFunctionBegin;
26818e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26828e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26838e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
26848e54d7e8SToby Isaac }
26858e54d7e8SToby Isaac 
26868e54d7e8SToby Isaac /*@
2687*2f04c522SBarry Smith   PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded.
26888e54d7e8SToby Isaac 
26898e54d7e8SToby Isaac   Collective
26908e54d7e8SToby Isaac 
26918e54d7e8SToby Isaac   Input Parameters:
26928e54d7e8SToby Isaac + sf       - star forest
2693*2f04c522SBarry Smith . unit     - the data type contained within the `rootdata` and `leafdata`
26948e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
26958e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
26968e54d7e8SToby Isaac 
26978e54d7e8SToby Isaac   Level: advanced
26988e54d7e8SToby Isaac 
26998e54d7e8SToby Isaac   Note:
2700*2f04c522SBarry Smith   See `PetscSFRegisterPersistent()` for when and how to use this function.
27018e54d7e8SToby Isaac 
2702*2f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
27038e54d7e8SToby Isaac @*/
27048e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
27058e54d7e8SToby Isaac {
27068e54d7e8SToby Isaac   PetscFunctionBegin;
27078e54d7e8SToby Isaac   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
27088e54d7e8SToby Isaac   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
27098e54d7e8SToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
27108e54d7e8SToby Isaac }
2711e1187f0dSToby Isaac 
2712e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2713e1187f0dSToby Isaac {
2714e1187f0dSToby Isaac   MPI_Aint lb, lb_true, bytes, bytes_true;
2715e1187f0dSToby Isaac 
2716e1187f0dSToby Isaac   PetscFunctionBegin;
2717e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2718e1187f0dSToby Isaac   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2719e1187f0dSToby Isaac   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2720e1187f0dSToby Isaac   *size = bytes;
2721e1187f0dSToby Isaac   PetscFunctionReturn(PETSC_SUCCESS);
2722e1187f0dSToby Isaac }
2723