xref: /petsc/src/vec/is/sf/interface/sf.c (revision ffeef943c8ee50edff320d8a3135bb0c94853e4c)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith 
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
77fd2d3dbSJunchao Zhang   #include <cuda_runtime.h>
8715b587bSJunchao Zhang   #include <petscdevice_cuda.h>
97fd2d3dbSJunchao Zhang #endif
107fd2d3dbSJunchao Zhang 
117fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
127fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
137fd2d3dbSJunchao Zhang #endif
147fd2d3dbSJunchao Zhang 
152abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
164bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
172abc8c78SJacob Faibussowitsch #else
1895fce210SBarry Smith   #if defined(PETSC_USE_DEBUG)
19a8f51744SPierre Jolivet     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
2095fce210SBarry Smith   #else
219371c9d4SSatish Balay     #define PetscSFCheckGraphSet(sf, arg) \
229371c9d4SSatish Balay       do { \
239371c9d4SSatish Balay       } while (0)
2495fce210SBarry Smith   #endif
252abc8c78SJacob Faibussowitsch #endif
2695fce210SBarry Smith 
274c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
281f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2995fce210SBarry Smith 
308af6ec1cSBarry Smith /*@
3195fce210SBarry Smith   PetscSFCreate - create a star forest communication context
3295fce210SBarry Smith 
33d083f849SBarry Smith   Collective
3495fce210SBarry Smith 
354165533cSJose E. Roman   Input Parameter:
3695fce210SBarry Smith . comm - communicator on which the star forest will operate
3795fce210SBarry Smith 
384165533cSJose E. Roman   Output Parameter:
3995fce210SBarry Smith . sf - new star forest context
4095fce210SBarry Smith 
4120662ed9SBarry Smith   Options Database Key:
426677b1c1SJunchao Zhang + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
436677b1c1SJunchao Zhang . -sf_type window                - Use MPI-3 one-sided window for communication
446677b1c1SJunchao Zhang . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
456677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46dd5b3ca6SJunchao Zhang 
4795fce210SBarry Smith   Level: intermediate
4895fce210SBarry Smith 
49cab54364SBarry Smith   Note:
50cab54364SBarry Smith   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51cab54364SBarry Smith   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
5220662ed9SBarry Smith   `SF`s are optimized and they have better performance than the general `SF`s.
53dd5b3ca6SJunchao Zhang 
5438b5cf2dSJacob Faibussowitsch .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5595fce210SBarry Smith @*/
56d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57d71ae5a4SJacob Faibussowitsch {
5895fce210SBarry Smith   PetscSF b;
5995fce210SBarry Smith 
6095fce210SBarry Smith   PetscFunctionBegin;
614f572ea9SToby Isaac   PetscAssertPointer(sf, 2);
629566063dSJacob Faibussowitsch   PetscCall(PetscSFInitializePackage());
6395fce210SBarry Smith 
649566063dSJacob Faibussowitsch   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6595fce210SBarry Smith 
6695fce210SBarry Smith   b->nroots    = -1;
6795fce210SBarry Smith   b->nleaves   = -1;
6829046d53SLisandro Dalcin   b->minleaf   = PETSC_MAX_INT;
6929046d53SLisandro Dalcin   b->maxleaf   = PETSC_MIN_INT;
7095fce210SBarry Smith   b->nranks    = -1;
7195fce210SBarry Smith   b->rankorder = PETSC_TRUE;
7295fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
7395fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
7495fce210SBarry Smith   b->graphset  = PETSC_FALSE;
7520c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7620c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
7720c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
7871438e86SJunchao Zhang   b->unknown_input_stream = PETSC_FALSE;
7927f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
8020c24465SJunchao Zhang   b->backend = PETSCSF_BACKEND_KOKKOS;
8127f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
8227f636e8SJunchao Zhang   b->backend = PETSCSF_BACKEND_CUDA;
8359af0bd3SScott Kruger   #elif defined(PETSC_HAVE_HIP)
8459af0bd3SScott Kruger   b->backend = PETSCSF_BACKEND_HIP;
8520c24465SJunchao Zhang   #endif
8671438e86SJunchao Zhang 
8771438e86SJunchao Zhang   #if defined(PETSC_HAVE_NVSHMEM)
8871438e86SJunchao Zhang   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
8971438e86SJunchao Zhang   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
909566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
919566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9271438e86SJunchao Zhang   #endif
9320c24465SJunchao Zhang #endif
9460c22052SBarry Smith   b->vscat.from_n = -1;
9560c22052SBarry Smith   b->vscat.to_n   = -1;
9660c22052SBarry Smith   b->vscat.unit   = MPIU_SCALAR;
9795fce210SBarry Smith   *sf             = b;
983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9995fce210SBarry Smith }
10095fce210SBarry Smith 
10129046d53SLisandro Dalcin /*@
10295fce210SBarry Smith   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10395fce210SBarry Smith 
10495fce210SBarry Smith   Collective
10595fce210SBarry Smith 
1064165533cSJose E. Roman   Input Parameter:
10795fce210SBarry Smith . sf - star forest
10895fce210SBarry Smith 
10995fce210SBarry Smith   Level: advanced
11095fce210SBarry Smith 
111cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11295fce210SBarry Smith @*/
113d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
114d71ae5a4SJacob Faibussowitsch {
11595fce210SBarry Smith   PetscFunctionBegin;
11695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
117dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Reset);
1180dd791a8SStefano Zampini   PetscCall(PetscSFDestroy(&sf->rankssf));
1190dd791a8SStefano Zampini 
12029046d53SLisandro Dalcin   sf->nroots   = -1;
12129046d53SLisandro Dalcin   sf->nleaves  = -1;
12229046d53SLisandro Dalcin   sf->minleaf  = PETSC_MAX_INT;
12329046d53SLisandro Dalcin   sf->maxleaf  = PETSC_MIN_INT;
12495fce210SBarry Smith   sf->mine     = NULL;
12595fce210SBarry Smith   sf->remote   = NULL;
12629046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
1279566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->mine_alloc));
1289566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->remote_alloc));
12921c688dcSJed Brown   sf->nranks = -1;
1309566063dSJacob Faibussowitsch   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
13129046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
1329566063dSJacob Faibussowitsch   PetscCall(PetscFree(sf->degree));
1339566063dSJacob Faibussowitsch   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1349566063dSJacob Faibussowitsch   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1350dd791a8SStefano Zampini 
136013b3241SStefano Zampini   if (sf->multi) sf->multi->multi = NULL;
1379566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf->multi));
1380dd791a8SStefano Zampini 
1399566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sf->map));
14071438e86SJunchao Zhang 
14171438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1429566063dSJacob Faibussowitsch   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14371438e86SJunchao Zhang #endif
14471438e86SJunchao Zhang 
14595fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
1463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14795fce210SBarry Smith }
14895fce210SBarry Smith 
149cc4c1da9SBarry Smith /*@
150cab54364SBarry Smith   PetscSFSetType - Set the `PetscSF` communication implementation
15195fce210SBarry Smith 
152c3339decSBarry Smith   Collective
15395fce210SBarry Smith 
15495fce210SBarry Smith   Input Parameters:
155cab54364SBarry Smith + sf   - the `PetscSF` context
15695fce210SBarry Smith - type - a known method
157cab54364SBarry Smith .vb
158cab54364SBarry Smith     PETSCSFWINDOW - MPI-2/3 one-sided
159cab54364SBarry Smith     PETSCSFBASIC - basic implementation using MPI-1 two-sided
160cab54364SBarry Smith .ve
16195fce210SBarry Smith 
16295fce210SBarry Smith   Options Database Key:
16320662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
164cab54364SBarry Smith 
165cab54364SBarry Smith   Level: intermediate
16695fce210SBarry Smith 
16795fce210SBarry Smith   Notes:
16820662ed9SBarry Smith   See `PetscSFType` for possible values
16995fce210SBarry Smith 
17020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
17195fce210SBarry Smith @*/
172d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
173d71ae5a4SJacob Faibussowitsch {
17495fce210SBarry Smith   PetscBool match;
1755f80ce2aSJacob Faibussowitsch   PetscErrorCode (*r)(PetscSF);
17695fce210SBarry Smith 
17795fce210SBarry Smith   PetscFunctionBegin;
17895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1794f572ea9SToby Isaac   PetscAssertPointer(type, 2);
18095fce210SBarry Smith 
1819566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1823ba16761SJacob Faibussowitsch   if (match) PetscFunctionReturn(PETSC_SUCCESS);
18395fce210SBarry Smith 
1849566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1856adde796SStefano Zampini   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18629046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
187dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Destroy);
1889566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1899566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1909566063dSJacob Faibussowitsch   PetscCall((*r)(sf));
1913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
19295fce210SBarry Smith }
19395fce210SBarry Smith 
194cc4c1da9SBarry Smith /*@
195cab54364SBarry Smith   PetscSFGetType - Get the `PetscSF` communication implementation
19629046d53SLisandro Dalcin 
19729046d53SLisandro Dalcin   Not Collective
19829046d53SLisandro Dalcin 
19929046d53SLisandro Dalcin   Input Parameter:
200cab54364SBarry Smith . sf - the `PetscSF` context
20129046d53SLisandro Dalcin 
20229046d53SLisandro Dalcin   Output Parameter:
203cab54364SBarry Smith . type - the `PetscSF` type name
20429046d53SLisandro Dalcin 
20529046d53SLisandro Dalcin   Level: intermediate
20629046d53SLisandro Dalcin 
20720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20829046d53SLisandro Dalcin @*/
209d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
210d71ae5a4SJacob Faibussowitsch {
21129046d53SLisandro Dalcin   PetscFunctionBegin;
21229046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2134f572ea9SToby Isaac   PetscAssertPointer(type, 2);
21429046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
2153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21629046d53SLisandro Dalcin }
21729046d53SLisandro Dalcin 
2181fb7b255SJunchao Zhang /*@C
21920662ed9SBarry Smith   PetscSFDestroy - destroy a star forest
22095fce210SBarry Smith 
22195fce210SBarry Smith   Collective
22295fce210SBarry Smith 
2234165533cSJose E. Roman   Input Parameter:
22495fce210SBarry Smith . sf - address of star forest
22595fce210SBarry Smith 
22695fce210SBarry Smith   Level: intermediate
22795fce210SBarry Smith 
22820662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22995fce210SBarry Smith @*/
230d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
231d71ae5a4SJacob Faibussowitsch {
23295fce210SBarry Smith   PetscFunctionBegin;
2333ba16761SJacob Faibussowitsch   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
234f4f49eeaSPierre Jolivet   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
235f4f49eeaSPierre Jolivet   if (--((PetscObject)*sf)->refct > 0) {
2369371c9d4SSatish Balay     *sf = NULL;
2373ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2389371c9d4SSatish Balay   }
2399566063dSJacob Faibussowitsch   PetscCall(PetscSFReset(*sf));
240f4f49eeaSPierre Jolivet   PetscTryTypeMethod(*sf, Destroy);
2419566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2429566063dSJacob Faibussowitsch   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
243c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
244715b587bSJunchao Zhang   if ((*sf)->use_stream_aware_mpi) {
245715b587bSJunchao Zhang     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
246715b587bSJunchao Zhang     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
247715b587bSJunchao Zhang   }
248715b587bSJunchao Zhang #endif
2499566063dSJacob Faibussowitsch   PetscCall(PetscHeaderDestroy(sf));
2503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25195fce210SBarry Smith }
25295fce210SBarry Smith 
253d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
254d71ae5a4SJacob Faibussowitsch {
255c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
256c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
257c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
258c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
259c4e6a40aSLawrence Mitchell 
260c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
2613ba16761SJacob Faibussowitsch   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2629566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2639566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
264c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
265c4e6a40aSLawrence Mitchell     const PetscInt rank   = iremote[i].rank;
266c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
267c4e6a40aSLawrence Mitchell     const PetscInt leaf   = ilocal ? ilocal[i] : i;
268c9cc58a2SBarry Smith     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26908401ef6SPierre Jolivet     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
27008401ef6SPierre Jolivet     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
271c4e6a40aSLawrence Mitchell   }
2723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
273c4e6a40aSLawrence Mitchell }
274c4e6a40aSLawrence Mitchell 
27595fce210SBarry Smith /*@
27620662ed9SBarry Smith   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27795fce210SBarry Smith 
27895fce210SBarry Smith   Collective
27995fce210SBarry Smith 
2804165533cSJose E. Roman   Input Parameter:
28195fce210SBarry Smith . sf - star forest communication object
28295fce210SBarry Smith 
28395fce210SBarry Smith   Level: beginner
28495fce210SBarry Smith 
28520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28695fce210SBarry Smith @*/
287d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
288d71ae5a4SJacob Faibussowitsch {
28995fce210SBarry Smith   PetscFunctionBegin;
29029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
29129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
2923ba16761SJacob Faibussowitsch   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2939566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2949566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckGraphValid_Private(sf));
2959566063dSJacob Faibussowitsch   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
296dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetUp);
29720c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29820c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
29971438e86SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_CUDA;
30071438e86SJunchao Zhang     sf->ops->Free   = PetscSFFree_CUDA;
30120c24465SJunchao Zhang   }
30220c24465SJunchao Zhang #endif
30359af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30459af0bd3SScott Kruger   if (sf->backend == PETSCSF_BACKEND_HIP) {
30559af0bd3SScott Kruger     sf->ops->Malloc = PetscSFMalloc_HIP;
30659af0bd3SScott Kruger     sf->ops->Free   = PetscSFFree_HIP;
30759af0bd3SScott Kruger   }
30859af0bd3SScott Kruger #endif
30920c24465SJunchao Zhang 
31020c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
31120c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31220c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31320c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31420c24465SJunchao Zhang   }
31520c24465SJunchao Zhang #endif
3169566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31795fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
3183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
31995fce210SBarry Smith }
32095fce210SBarry Smith 
3218af6ec1cSBarry Smith /*@
322cab54364SBarry Smith   PetscSFSetFromOptions - set `PetscSF` options using the options database
32395fce210SBarry Smith 
32495fce210SBarry Smith   Logically Collective
32595fce210SBarry Smith 
3264165533cSJose E. Roman   Input Parameter:
32795fce210SBarry Smith . sf - star forest
32895fce210SBarry Smith 
32995fce210SBarry Smith   Options Database Keys:
33020662ed9SBarry Smith + -sf_type                                                                                                         - implementation type, see `PetscSFSetType()`
33151ccb202SJunchao Zhang . -sf_rank_order                                                                                                   - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
33220662ed9SBarry Smith . -sf_use_default_stream                                                                                           - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33320662ed9SBarry Smith                             use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33420662ed9SBarry Smith                             If true, this option only works with `-use_gpu_aware_mpi 1`.
33520662ed9SBarry Smith . -sf_use_stream_aware_mpi                                                                                         - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33620662ed9SBarry Smith                                If true, this option only works with `-use_gpu_aware_mpi 1`.
33795fce210SBarry Smith 
33838b5cf2dSJacob Faibussowitsch - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33959af0bd3SScott Kruger                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
34020c24465SJunchao Zhang                               the only available is kokkos.
34120c24465SJunchao Zhang 
34295fce210SBarry Smith   Level: intermediate
343cab54364SBarry Smith 
344cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34595fce210SBarry Smith @*/
346d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347d71ae5a4SJacob Faibussowitsch {
34895fce210SBarry Smith   PetscSFType deft;
34995fce210SBarry Smith   char        type[256];
35095fce210SBarry Smith   PetscBool   flg;
35195fce210SBarry Smith 
35295fce210SBarry Smith   PetscFunctionBegin;
35395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
354d0609cedSBarry Smith   PetscObjectOptionsBegin((PetscObject)sf);
35595fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3569566063dSJacob Faibussowitsch   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3579566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, flg ? type : deft));
3589566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
3597fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
36020c24465SJunchao Zhang   {
36120c24465SJunchao Zhang     char      backendstr[32] = {0};
36259af0bd3SScott Kruger     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36320c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
364d5b43468SJose E. Roman     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3669566063dSJacob Faibussowitsch     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3679566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3689566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3699566063dSJacob Faibussowitsch     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
37059af0bd3SScott Kruger   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37120c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37220c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37359af0bd3SScott Kruger     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37428b400f6SJacob Faibussowitsch     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37520c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37608401ef6SPierre Jolivet     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37720c24465SJunchao Zhang   #endif
378715b587bSJunchao Zhang 
379715b587bSJunchao Zhang   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380715b587bSJunchao Zhang     if (sf->use_stream_aware_mpi) {
381715b587bSJunchao Zhang       MPI_Info info;
382715b587bSJunchao Zhang 
383715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_create(&info));
384715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385715b587bSJunchao Zhang       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387715b587bSJunchao Zhang       PetscCallMPI(MPI_Info_free(&info));
388715b587bSJunchao Zhang       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389715b587bSJunchao Zhang     }
390715b587bSJunchao Zhang   #endif
39120c24465SJunchao Zhang   }
392c2a741eeSJunchao Zhang #endif
393dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394d0609cedSBarry Smith   PetscOptionsEnd();
3953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39695fce210SBarry Smith }
39795fce210SBarry Smith 
39829046d53SLisandro Dalcin /*@
39995fce210SBarry Smith   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
40095fce210SBarry Smith 
40195fce210SBarry Smith   Logically Collective
40295fce210SBarry Smith 
4034165533cSJose E. Roman   Input Parameters:
40495fce210SBarry Smith + sf  - star forest
405cab54364SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
40695fce210SBarry Smith 
40795fce210SBarry Smith   Level: advanced
40895fce210SBarry Smith 
40920662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
41095fce210SBarry Smith @*/
411d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412d71ae5a4SJacob Faibussowitsch {
41395fce210SBarry Smith   PetscFunctionBegin;
41495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41595fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf, flg, 2);
41628b400f6SJacob Faibussowitsch   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41795fce210SBarry Smith   sf->rankorder = flg;
4183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
41995fce210SBarry Smith }
42095fce210SBarry Smith 
4218dbb0df6SBarry Smith /*@C
42295fce210SBarry Smith   PetscSFSetGraph - Set a parallel star forest
42395fce210SBarry Smith 
42495fce210SBarry Smith   Collective
42595fce210SBarry Smith 
4264165533cSJose E. Roman   Input Parameters:
42795fce210SBarry Smith + sf         - star forest
42895fce210SBarry Smith . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
42995fce210SBarry Smith . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
43020662ed9SBarry Smith . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431c4e6a40aSLawrence Mitchell during setup in debug mode)
43220662ed9SBarry Smith . localmode  - copy mode for `ilocal`
433c4e6a40aSLawrence Mitchell . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
434c4e6a40aSLawrence Mitchell during setup in debug mode)
43520662ed9SBarry Smith - remotemode - copy mode for `iremote`
43695fce210SBarry Smith 
43795fce210SBarry Smith   Level: intermediate
43895fce210SBarry Smith 
43995452b02SPatrick Sanan   Notes:
44020662ed9SBarry Smith   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
44138ab3f8aSBarry Smith 
44220662ed9SBarry Smith   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44320662ed9SBarry Smith   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444db2b9530SVaclav Hapla   PETSc might modify the respective array;
44520662ed9SBarry Smith   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446cab54364SBarry Smith   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
447db2b9530SVaclav Hapla 
44838b5cf2dSJacob Faibussowitsch   Fortran Notes:
44920662ed9SBarry Smith   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
450c4e6a40aSLawrence Mitchell 
45138b5cf2dSJacob Faibussowitsch   Developer Notes:
452db2b9530SVaclav Hapla   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45320662ed9SBarry Smith   This also allows to compare leaf sets of two `PetscSF`s easily.
45472bf8598SVaclav Hapla 
45520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
45695fce210SBarry Smith @*/
457d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
458d71ae5a4SJacob Faibussowitsch {
459db2b9530SVaclav Hapla   PetscBool unique, contiguous;
46095fce210SBarry Smith 
46195fce210SBarry Smith   PetscFunctionBegin;
46295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4634f572ea9SToby Isaac   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4644f572ea9SToby Isaac   if (nleaves > 0) PetscAssertPointer(iremote, 6);
46508401ef6SPierre Jolivet   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46608401ef6SPierre Jolivet   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4678da24d32SBarry Smith   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4688da24d32SBarry Smith    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4698da24d32SBarry Smith   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4708da24d32SBarry Smith   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47129046d53SLisandro Dalcin 
4722a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
4739566063dSJacob Faibussowitsch     PetscCall(PetscSFReset(sf));
4742a67d2daSStefano Zampini   }
4752a67d2daSStefano Zampini 
4769566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
47729046d53SLisandro Dalcin 
47895fce210SBarry Smith   sf->nroots  = nroots;
47995fce210SBarry Smith   sf->nleaves = nleaves;
48029046d53SLisandro Dalcin 
481db2b9530SVaclav Hapla   if (localmode == PETSC_COPY_VALUES && ilocal) {
482db2b9530SVaclav Hapla     PetscInt *tlocal = NULL;
483db2b9530SVaclav Hapla 
4849566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tlocal));
4859566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
486db2b9530SVaclav Hapla     ilocal = tlocal;
487db2b9530SVaclav Hapla   }
488db2b9530SVaclav Hapla   if (remotemode == PETSC_COPY_VALUES) {
489db2b9530SVaclav Hapla     PetscSFNode *tremote = NULL;
490db2b9530SVaclav Hapla 
4919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nleaves, &tremote));
4929566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
493db2b9530SVaclav Hapla     iremote = tremote;
494db2b9530SVaclav Hapla   }
495db2b9530SVaclav Hapla 
49629046d53SLisandro Dalcin   if (nleaves && ilocal) {
497db2b9530SVaclav Hapla     PetscSFNode work;
498db2b9530SVaclav Hapla 
4999566063dSJacob Faibussowitsch     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
5009566063dSJacob Faibussowitsch     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
501db2b9530SVaclav Hapla     unique = PetscNot(unique);
502db2b9530SVaclav Hapla     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
503db2b9530SVaclav Hapla     sf->minleaf = ilocal[0];
504db2b9530SVaclav Hapla     sf->maxleaf = ilocal[nleaves - 1];
505db2b9530SVaclav Hapla     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
50629046d53SLisandro Dalcin   } else {
50729046d53SLisandro Dalcin     sf->minleaf = 0;
50829046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
509db2b9530SVaclav Hapla     unique      = PETSC_TRUE;
510db2b9530SVaclav Hapla     contiguous  = PETSC_TRUE;
51129046d53SLisandro Dalcin   }
51229046d53SLisandro Dalcin 
513db2b9530SVaclav Hapla   if (contiguous) {
514db2b9530SVaclav Hapla     if (localmode == PETSC_USE_POINTER) {
515db2b9530SVaclav Hapla       ilocal = NULL;
516db2b9530SVaclav Hapla     } else {
5179566063dSJacob Faibussowitsch       PetscCall(PetscFree(ilocal));
518db2b9530SVaclav Hapla     }
519db2b9530SVaclav Hapla   }
520db2b9530SVaclav Hapla   sf->mine = ilocal;
521db2b9530SVaclav Hapla   if (localmode == PETSC_USE_POINTER) {
52229046d53SLisandro Dalcin     sf->mine_alloc = NULL;
523db2b9530SVaclav Hapla   } else {
524db2b9530SVaclav Hapla     sf->mine_alloc = ilocal;
52595fce210SBarry Smith   }
526db2b9530SVaclav Hapla   sf->remote = iremote;
527db2b9530SVaclav Hapla   if (remotemode == PETSC_USE_POINTER) {
52829046d53SLisandro Dalcin     sf->remote_alloc = NULL;
529db2b9530SVaclav Hapla   } else {
530db2b9530SVaclav Hapla     sf->remote_alloc = iremote;
53195fce210SBarry Smith   }
5329566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
53329046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
5343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
53595fce210SBarry Smith }
53695fce210SBarry Smith 
53729046d53SLisandro Dalcin /*@
538cab54364SBarry Smith   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
539dd5b3ca6SJunchao Zhang 
540dd5b3ca6SJunchao Zhang   Collective
541dd5b3ca6SJunchao Zhang 
542dd5b3ca6SJunchao Zhang   Input Parameters:
543cab54364SBarry Smith + sf      - The `PetscSF`
544cab54364SBarry Smith . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
545cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
546cab54364SBarry Smith 
547cab54364SBarry Smith   Level: intermediate
548dd5b3ca6SJunchao Zhang 
549dd5b3ca6SJunchao Zhang   Notes:
55020662ed9SBarry Smith   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
55120662ed9SBarry Smith   `n` and `N` are the local and global sizes of `x` respectively.
552dd5b3ca6SJunchao Zhang 
55320662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
55420662ed9SBarry Smith   sequential vectors `y` on all MPI processes.
555dd5b3ca6SJunchao Zhang 
55620662ed9SBarry Smith   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
55720662ed9SBarry Smith   sequential vector `y` on rank 0.
558dd5b3ca6SJunchao Zhang 
55920662ed9SBarry Smith   In above cases, entries of `x` are roots and entries of `y` are leaves.
560dd5b3ca6SJunchao Zhang 
56120662ed9SBarry Smith   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
562dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
563cab54364SBarry Smith   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
564dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
565cab54364SBarry Smith   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
566dd5b3ca6SJunchao Zhang 
567dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
568dd5b3ca6SJunchao Zhang 
569cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
570dd5b3ca6SJunchao Zhang  @*/
571d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
572d71ae5a4SJacob Faibussowitsch {
573dd5b3ca6SJunchao Zhang   MPI_Comm    comm;
574dd5b3ca6SJunchao Zhang   PetscInt    n, N, res[2];
575dd5b3ca6SJunchao Zhang   PetscMPIInt rank, size;
576dd5b3ca6SJunchao Zhang   PetscSFType type;
577dd5b3ca6SJunchao Zhang 
578dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
5792abc8c78SJacob Faibussowitsch   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
5804f572ea9SToby Isaac   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5819566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5822c71b3e2SJacob Faibussowitsch   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5849566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
585dd5b3ca6SJunchao Zhang 
586dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
587dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
5889566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreate(comm, &sf->map));
5899566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
5909566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
5919566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(sf->map));
592dd5b3ca6SJunchao Zhang   } else {
5939566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetLocalSize(map, &n));
5949566063dSJacob Faibussowitsch     PetscCall(PetscLayoutGetSize(map, &N));
595dd5b3ca6SJunchao Zhang     res[0] = n;
596dd5b3ca6SJunchao Zhang     res[1] = -n;
597dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
5981c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
599dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
600dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
601dd5b3ca6SJunchao Zhang     } else {
602dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
603dd5b3ca6SJunchao Zhang     }
6049566063dSJacob Faibussowitsch     PetscCall(PetscLayoutReference(map, &sf->map));
605dd5b3ca6SJunchao Zhang   }
6069566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(sf, type));
607dd5b3ca6SJunchao Zhang 
608dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
609dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
610dd5b3ca6SJunchao Zhang 
611dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
612dd5b3ca6SJunchao Zhang      Also set other easy stuff.
613dd5b3ca6SJunchao Zhang    */
614dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
615dd5b3ca6SJunchao Zhang     sf->nleaves = N;
616dd5b3ca6SJunchao Zhang     sf->nroots  = n;
617dd5b3ca6SJunchao Zhang     sf->nranks  = size;
618dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
619dd5b3ca6SJunchao Zhang     sf->maxleaf = N - 1;
620dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
621dd5b3ca6SJunchao Zhang     sf->nleaves = rank ? 0 : N;
622dd5b3ca6SJunchao Zhang     sf->nroots  = n;
623dd5b3ca6SJunchao Zhang     sf->nranks  = rank ? 0 : size;
624dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
625dd5b3ca6SJunchao Zhang     sf->maxleaf = rank ? -1 : N - 1;
626dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
627dd5b3ca6SJunchao Zhang     sf->nleaves = size;
628dd5b3ca6SJunchao Zhang     sf->nroots  = size;
629dd5b3ca6SJunchao Zhang     sf->nranks  = size;
630dd5b3ca6SJunchao Zhang     sf->minleaf = 0;
631dd5b3ca6SJunchao Zhang     sf->maxleaf = size - 1;
632dd5b3ca6SJunchao Zhang   }
633dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
634dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
6353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
636dd5b3ca6SJunchao Zhang }
637dd5b3ca6SJunchao Zhang 
638dd5b3ca6SJunchao Zhang /*@
639cab54364SBarry Smith   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
64095fce210SBarry Smith 
64195fce210SBarry Smith   Collective
64295fce210SBarry Smith 
6434165533cSJose E. Roman   Input Parameter:
64495fce210SBarry Smith . sf - star forest to invert
64595fce210SBarry Smith 
6464165533cSJose E. Roman   Output Parameter:
64720662ed9SBarry Smith . isf - inverse of `sf`
6484165533cSJose E. Roman 
64995fce210SBarry Smith   Level: advanced
65095fce210SBarry Smith 
65195fce210SBarry Smith   Notes:
65295fce210SBarry Smith   All roots must have degree 1.
65395fce210SBarry Smith 
65495fce210SBarry Smith   The local space may be a permutation, but cannot be sparse.
65595fce210SBarry Smith 
65620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
65795fce210SBarry Smith @*/
658d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
659d71ae5a4SJacob Faibussowitsch {
66095fce210SBarry Smith   PetscMPIInt     rank;
66195fce210SBarry Smith   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
66295fce210SBarry Smith   const PetscInt *ilocal;
66395fce210SBarry Smith   PetscSFNode    *roots, *leaves;
66495fce210SBarry Smith 
66595fce210SBarry Smith   PetscFunctionBegin;
66629046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
66729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
6684f572ea9SToby Isaac   PetscAssertPointer(isf, 2);
66929046d53SLisandro Dalcin 
6709566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
67129046d53SLisandro Dalcin   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
67229046d53SLisandro Dalcin 
6739566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
675ae9aee6dSMatthew G. Knepley   for (i = 0; i < maxlocal; i++) {
67695fce210SBarry Smith     leaves[i].rank  = rank;
67795fce210SBarry Smith     leaves[i].index = i;
67895fce210SBarry Smith   }
67995fce210SBarry Smith   for (i = 0; i < nroots; i++) {
68095fce210SBarry Smith     roots[i].rank  = -1;
68195fce210SBarry Smith     roots[i].index = -1;
68295fce210SBarry Smith   }
6839566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
6849566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
68595fce210SBarry Smith 
68695fce210SBarry Smith   /* Check whether our leaves are sparse */
6879371c9d4SSatish Balay   for (i = 0, count = 0; i < nroots; i++)
6889371c9d4SSatish Balay     if (roots[i].rank >= 0) count++;
68995fce210SBarry Smith   if (count == nroots) newilocal = NULL;
6909371c9d4SSatish Balay   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
69195fce210SBarry Smith     for (i = 0, count = 0; i < nroots; i++) {
69295fce210SBarry Smith       if (roots[i].rank >= 0) {
69395fce210SBarry Smith         newilocal[count]   = i;
69495fce210SBarry Smith         roots[count].rank  = roots[i].rank;
69595fce210SBarry Smith         roots[count].index = roots[i].index;
69695fce210SBarry Smith         count++;
69795fce210SBarry Smith       }
69895fce210SBarry Smith     }
69995fce210SBarry Smith   }
70095fce210SBarry Smith 
7019566063dSJacob Faibussowitsch   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7029566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7039566063dSJacob Faibussowitsch   PetscCall(PetscFree2(roots, leaves));
7043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
70595fce210SBarry Smith }
70695fce210SBarry Smith 
70795fce210SBarry Smith /*@
708cab54364SBarry Smith   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
70995fce210SBarry Smith 
71095fce210SBarry Smith   Collective
71195fce210SBarry Smith 
7124165533cSJose E. Roman   Input Parameters:
71395fce210SBarry Smith + sf  - communication object to duplicate
714cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
71595fce210SBarry Smith 
7164165533cSJose E. Roman   Output Parameter:
71795fce210SBarry Smith . newsf - new communication object
71895fce210SBarry Smith 
71995fce210SBarry Smith   Level: beginner
72095fce210SBarry Smith 
72120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
72295fce210SBarry Smith @*/
723d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
724d71ae5a4SJacob Faibussowitsch {
72529046d53SLisandro Dalcin   PetscSFType  type;
72697929ea7SJunchao Zhang   MPI_Datatype dtype = MPIU_SCALAR;
72795fce210SBarry Smith 
72895fce210SBarry Smith   PetscFunctionBegin;
72929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
73029046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf, opt, 2);
7314f572ea9SToby Isaac   PetscAssertPointer(newsf, 3);
7329566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7339566063dSJacob Faibussowitsch   PetscCall(PetscSFGetType(sf, &type));
7349566063dSJacob Faibussowitsch   if (type) PetscCall(PetscSFSetType(*newsf, type));
73535cb6cd3SPierre Jolivet   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
73695fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
737dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf, 1);
738dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
73995fce210SBarry Smith       PetscInt           nroots, nleaves;
74095fce210SBarry Smith       const PetscInt    *ilocal;
74195fce210SBarry Smith       const PetscSFNode *iremote;
7429566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7439566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
744dd5b3ca6SJunchao Zhang     } else {
7459566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
746dd5b3ca6SJunchao Zhang     }
74795fce210SBarry Smith   }
74897929ea7SJunchao Zhang   /* Since oldtype is committed, so is newtype, according to MPI */
7499566063dSJacob Faibussowitsch   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
75097929ea7SJunchao Zhang   (*newsf)->vscat.bs     = sf->vscat.bs;
75197929ea7SJunchao Zhang   (*newsf)->vscat.unit   = dtype;
75297929ea7SJunchao Zhang   (*newsf)->vscat.to_n   = sf->vscat.to_n;
75397929ea7SJunchao Zhang   (*newsf)->vscat.from_n = sf->vscat.from_n;
75497929ea7SJunchao Zhang   /* Do not copy lsf. Build it on demand since it is rarely used */
75597929ea7SJunchao Zhang 
75620c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
75720c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
75871438e86SJunchao Zhang   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
75920c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
76020c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
76120c24465SJunchao Zhang #endif
762dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
76320c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
76595fce210SBarry Smith }
76695fce210SBarry Smith 
76795fce210SBarry Smith /*@C
76895fce210SBarry Smith   PetscSFGetGraph - Get the graph specifying a parallel star forest
76995fce210SBarry Smith 
77095fce210SBarry Smith   Not Collective
77195fce210SBarry Smith 
7724165533cSJose E. Roman   Input Parameter:
77395fce210SBarry Smith . sf - star forest
77495fce210SBarry Smith 
7754165533cSJose E. Roman   Output Parameters:
77695fce210SBarry Smith + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
77795fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process
77820662ed9SBarry Smith . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
77995fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
78095fce210SBarry Smith 
781cab54364SBarry Smith   Level: intermediate
782cab54364SBarry Smith 
783373e0d91SLisandro Dalcin   Notes:
78420662ed9SBarry Smith   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
785373e0d91SLisandro Dalcin 
78620662ed9SBarry Smith   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
787db2b9530SVaclav Hapla 
7888dbb0df6SBarry Smith   Fortran Notes:
78920662ed9SBarry Smith   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
79020662ed9SBarry Smith   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
7918dbb0df6SBarry Smith 
79220662ed9SBarry Smith   To check for a `NULL` `ilocal` use
7938dbb0df6SBarry Smith $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
794ca797d7aSLawrence Mitchell 
79520662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
79695fce210SBarry Smith @*/
797d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
798d71ae5a4SJacob Faibussowitsch {
79995fce210SBarry Smith   PetscFunctionBegin;
80095fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
801b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
802f4f49eeaSPierre Jolivet     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
803b8dee149SJunchao Zhang   } else {
80495fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
80595fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
80695fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
80795fce210SBarry Smith     if (iremote) *iremote = sf->remote;
808b8dee149SJunchao Zhang   }
8093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81095fce210SBarry Smith }
81195fce210SBarry Smith 
81229046d53SLisandro Dalcin /*@
81395fce210SBarry Smith   PetscSFGetLeafRange - Get the active leaf ranges
81495fce210SBarry Smith 
81595fce210SBarry Smith   Not Collective
81695fce210SBarry Smith 
8174165533cSJose E. Roman   Input Parameter:
81895fce210SBarry Smith . sf - star forest
81995fce210SBarry Smith 
8204165533cSJose E. Roman   Output Parameters:
82120662ed9SBarry Smith + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
82220662ed9SBarry Smith - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
82395fce210SBarry Smith 
82495fce210SBarry Smith   Level: developer
82595fce210SBarry Smith 
82620662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
82795fce210SBarry Smith @*/
828d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
829d71ae5a4SJacob Faibussowitsch {
83095fce210SBarry Smith   PetscFunctionBegin;
83195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
83229046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
83395fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
83495fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
8353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
83695fce210SBarry Smith }
83795fce210SBarry Smith 
838*ffeef943SBarry Smith /*@
839cab54364SBarry Smith   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
840fe2efc57SMark 
84120f4b53cSBarry Smith   Collective
842fe2efc57SMark 
843fe2efc57SMark   Input Parameters:
844fe2efc57SMark + A    - the star forest
845cab54364SBarry Smith . obj  - Optional object that provides the prefix for the option names
846736c3998SJose E. Roman - name - command line option
847fe2efc57SMark 
848fe2efc57SMark   Level: intermediate
849cab54364SBarry Smith 
85020662ed9SBarry Smith   Note:
85120662ed9SBarry Smith   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
85220662ed9SBarry Smith 
853db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
854fe2efc57SMark @*/
855d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
856d71ae5a4SJacob Faibussowitsch {
857fe2efc57SMark   PetscFunctionBegin;
858fe2efc57SMark   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8599566063dSJacob Faibussowitsch   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8603ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
861fe2efc57SMark }
862fe2efc57SMark 
863*ffeef943SBarry Smith /*@
86495fce210SBarry Smith   PetscSFView - view a star forest
86595fce210SBarry Smith 
86695fce210SBarry Smith   Collective
86795fce210SBarry Smith 
8684165533cSJose E. Roman   Input Parameters:
86995fce210SBarry Smith + sf     - star forest
870cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
87195fce210SBarry Smith 
87295fce210SBarry Smith   Level: beginner
87395fce210SBarry Smith 
874cab54364SBarry Smith .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
87595fce210SBarry Smith @*/
876d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
877d71ae5a4SJacob Faibussowitsch {
87895fce210SBarry Smith   PetscBool         iascii;
87995fce210SBarry Smith   PetscViewerFormat format;
88095fce210SBarry Smith 
88195fce210SBarry Smith   PetscFunctionBegin;
88295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8839566063dSJacob Faibussowitsch   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
88495fce210SBarry Smith   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
88595fce210SBarry Smith   PetscCheckSameComm(sf, 1, viewer, 2);
8869566063dSJacob Faibussowitsch   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8879566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
88853dd6d7dSJunchao Zhang   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
88995fce210SBarry Smith     PetscMPIInt rank;
89081bfa7aaSJed Brown     PetscInt    ii, i, j;
89195fce210SBarry Smith 
8929566063dSJacob Faibussowitsch     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
8939566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPushTab(viewer));
894dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
89580153354SVaclav Hapla       if (!sf->graphset) {
8969566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
8979566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPopTab(viewer));
8983ba16761SJacob Faibussowitsch         PetscFunctionReturn(PETSC_SUCCESS);
89980153354SVaclav Hapla       }
9009566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9019566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9029566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
90348a46eb9SPierre Jolivet       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
9049566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9059566063dSJacob Faibussowitsch       PetscCall(PetscViewerGetFormat(viewer, &format));
90695fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
90781bfa7aaSJed Brown         PetscMPIInt *tmpranks, *perm;
9089566063dSJacob Faibussowitsch         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9099566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
91081bfa7aaSJed Brown         for (i = 0; i < sf->nranks; i++) perm[i] = i;
9119566063dSJacob Faibussowitsch         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9129566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
91381bfa7aaSJed Brown         for (ii = 0; ii < sf->nranks; ii++) {
91481bfa7aaSJed Brown           i = perm[ii];
9159566063dSJacob Faibussowitsch           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
91648a46eb9SPierre Jolivet           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
91795fce210SBarry Smith         }
9189566063dSJacob Faibussowitsch         PetscCall(PetscFree2(tmpranks, perm));
91995fce210SBarry Smith       }
9209566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9219566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
922dd5b3ca6SJunchao Zhang     }
9239566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPopTab(viewer));
92495fce210SBarry Smith   }
925dbbe0bcdSBarry Smith   PetscTryTypeMethod(sf, View, viewer);
9263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
92795fce210SBarry Smith }
92895fce210SBarry Smith 
92995fce210SBarry Smith /*@C
930dec1416fSJunchao Zhang   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
93195fce210SBarry Smith 
93295fce210SBarry Smith   Not Collective
93395fce210SBarry Smith 
9344165533cSJose E. Roman   Input Parameter:
93595fce210SBarry Smith . sf - star forest
93695fce210SBarry Smith 
9374165533cSJose E. Roman   Output Parameters:
93895fce210SBarry Smith + nranks  - number of ranks referenced by local part
93920662ed9SBarry Smith . ranks   - [`nranks`] array of ranks
94020662ed9SBarry Smith . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
94120662ed9SBarry Smith . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
94220662ed9SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank
94395fce210SBarry Smith 
94495fce210SBarry Smith   Level: developer
94595fce210SBarry Smith 
946cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
94795fce210SBarry Smith @*/
948d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
949d71ae5a4SJacob Faibussowitsch {
95095fce210SBarry Smith   PetscFunctionBegin;
95195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
95228b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
953dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
9549927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
955dec1416fSJunchao Zhang   } else {
956dec1416fSJunchao Zhang     /* The generic implementation */
95795fce210SBarry Smith     if (nranks) *nranks = sf->nranks;
95895fce210SBarry Smith     if (ranks) *ranks = sf->ranks;
95995fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
96095fce210SBarry Smith     if (rmine) *rmine = sf->rmine;
96195fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
962dec1416fSJunchao Zhang   }
9633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
96495fce210SBarry Smith }
96595fce210SBarry Smith 
9668750ddebSJunchao Zhang /*@C
9678750ddebSJunchao Zhang   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9688750ddebSJunchao Zhang 
9698750ddebSJunchao Zhang   Not Collective
9708750ddebSJunchao Zhang 
9714165533cSJose E. Roman   Input Parameter:
9728750ddebSJunchao Zhang . sf - star forest
9738750ddebSJunchao Zhang 
9744165533cSJose E. Roman   Output Parameters:
9758750ddebSJunchao Zhang + niranks  - number of leaf ranks referencing roots on this process
97620662ed9SBarry Smith . iranks   - [`niranks`] array of ranks
97720662ed9SBarry Smith . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
97820662ed9SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
9798750ddebSJunchao Zhang 
9808750ddebSJunchao Zhang   Level: developer
9818750ddebSJunchao Zhang 
982cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
9838750ddebSJunchao Zhang @*/
984d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
985d71ae5a4SJacob Faibussowitsch {
9868750ddebSJunchao Zhang   PetscFunctionBegin;
9878750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
98828b400f6SJacob Faibussowitsch   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
9898750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
9909927e4dfSBarry Smith     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
9918750ddebSJunchao Zhang   } else {
9928750ddebSJunchao Zhang     PetscSFType type;
9939566063dSJacob Faibussowitsch     PetscCall(PetscSFGetType(sf, &type));
99498921bdaSJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
9958750ddebSJunchao Zhang   }
9963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9978750ddebSJunchao Zhang }
9988750ddebSJunchao Zhang 
999d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1000d71ae5a4SJacob Faibussowitsch {
1001b5a8e515SJed Brown   PetscInt i;
1002b5a8e515SJed Brown   for (i = 0; i < n; i++) {
1003b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
1004b5a8e515SJed Brown   }
1005b5a8e515SJed Brown   return PETSC_FALSE;
1006b5a8e515SJed Brown }
1007b5a8e515SJed Brown 
100895fce210SBarry Smith /*@C
1009cab54364SBarry Smith   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
101021c688dcSJed Brown 
101121c688dcSJed Brown   Collective
101221c688dcSJed Brown 
10134165533cSJose E. Roman   Input Parameters:
1014cab54364SBarry Smith + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1015cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
101621c688dcSJed Brown 
101721c688dcSJed Brown   Level: developer
101821c688dcSJed Brown 
1019cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetRootRanks()`
102021c688dcSJed Brown @*/
1021d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1022d71ae5a4SJacob Faibussowitsch {
1023eec179cfSJacob Faibussowitsch   PetscHMapI    table;
1024eec179cfSJacob Faibussowitsch   PetscHashIter pos;
1025b5a8e515SJed Brown   PetscMPIInt   size, groupsize, *groupranks;
1026247e8311SStefano Zampini   PetscInt     *rcount, *ranks;
1027247e8311SStefano Zampini   PetscInt      i, irank = -1, orank = -1;
102821c688dcSJed Brown 
102921c688dcSJed Brown   PetscFunctionBegin;
103021c688dcSJed Brown   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
103129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
10329566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1033eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(10, &table));
103421c688dcSJed Brown   for (i = 0; i < sf->nleaves; i++) {
103521c688dcSJed Brown     /* Log 1-based rank */
1036eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
103721c688dcSJed Brown   }
1038eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIGetSize(table, &sf->nranks));
10399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1041eec179cfSJacob Faibussowitsch   PetscHashIterBegin(table, pos);
104221c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
1043eec179cfSJacob Faibussowitsch     PetscHashIterGetKey(table, pos, ranks[i]);
1044eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(table, pos, rcount[i]);
1045eec179cfSJacob Faibussowitsch     PetscHashIterNext(table, pos);
104621c688dcSJed Brown     ranks[i]--; /* Convert back to 0-based */
104721c688dcSJed Brown   }
1048eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&table));
1049b5a8e515SJed Brown 
1050b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1051b5a8e515SJed Brown   {
10527fb8a5e4SKarl Rupp     MPI_Group    group = MPI_GROUP_NULL;
1053b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
10549566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10559566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10569566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10579566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(groupsize, &groupranks));
1058b5a8e515SJed Brown     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10599566063dSJacob Faibussowitsch     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10609566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
10619566063dSJacob Faibussowitsch     PetscCall(PetscFree(dgroupranks));
1062b5a8e515SJed Brown   }
1063b5a8e515SJed Brown 
1064b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1065b5a8e515SJed Brown   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1066b5a8e515SJed Brown     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1067b5a8e515SJed Brown       if (InList(ranks[i], groupsize, groupranks)) break;
1068b5a8e515SJed Brown     }
1069b5a8e515SJed Brown     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1070b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1071b5a8e515SJed Brown     }
1072b5a8e515SJed Brown     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1073b5a8e515SJed Brown       PetscInt tmprank, tmpcount;
1074247e8311SStefano Zampini 
1075b5a8e515SJed Brown       tmprank             = ranks[i];
1076b5a8e515SJed Brown       tmpcount            = rcount[i];
1077b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1078b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1079b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1080b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1081b5a8e515SJed Brown       sf->ndranks++;
1082b5a8e515SJed Brown     }
1083b5a8e515SJed Brown   }
10849566063dSJacob Faibussowitsch   PetscCall(PetscFree(groupranks));
10859566063dSJacob Faibussowitsch   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
10865c0db29aSPierre Jolivet   if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
108721c688dcSJed Brown   sf->roffset[0] = 0;
108821c688dcSJed Brown   for (i = 0; i < sf->nranks; i++) {
10899566063dSJacob Faibussowitsch     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
109021c688dcSJed Brown     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
109121c688dcSJed Brown     rcount[i]          = 0;
109221c688dcSJed Brown   }
1093247e8311SStefano Zampini   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1094247e8311SStefano Zampini     /* short circuit */
1095247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
109621c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
10979566063dSJacob Faibussowitsch       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1098b5a8e515SJed Brown       if (irank < 0) {
10999566063dSJacob Faibussowitsch         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1100b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
110121c688dcSJed Brown       }
1102247e8311SStefano Zampini       orank = sf->remote[i].rank;
1103247e8311SStefano Zampini     }
110408401ef6SPierre Jolivet     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
110521c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
110621c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
110721c688dcSJed Brown     rcount[irank]++;
110821c688dcSJed Brown   }
11099566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rcount, ranks));
11103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
111121c688dcSJed Brown }
111221c688dcSJed Brown 
111321c688dcSJed Brown /*@C
111495fce210SBarry Smith   PetscSFGetGroups - gets incoming and outgoing process groups
111595fce210SBarry Smith 
111695fce210SBarry Smith   Collective
111795fce210SBarry Smith 
11184165533cSJose E. Roman   Input Parameter:
111995fce210SBarry Smith . sf - star forest
112095fce210SBarry Smith 
11214165533cSJose E. Roman   Output Parameters:
112295fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
112395fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
112495fce210SBarry Smith 
112595fce210SBarry Smith   Level: developer
112695fce210SBarry Smith 
1127cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
112895fce210SBarry Smith @*/
1129d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1130d71ae5a4SJacob Faibussowitsch {
11317fb8a5e4SKarl Rupp   MPI_Group group = MPI_GROUP_NULL;
113295fce210SBarry Smith 
113395fce210SBarry Smith   PetscFunctionBegin;
113408401ef6SPierre Jolivet   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
113595fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
113695fce210SBarry Smith     PetscInt        i;
113795fce210SBarry Smith     const PetscInt *indegree;
113895fce210SBarry Smith     PetscMPIInt     rank, *outranks, *inranks;
113995fce210SBarry Smith     PetscSFNode    *remote;
114095fce210SBarry Smith     PetscSF         bgcount;
114195fce210SBarry Smith 
114295fce210SBarry Smith     /* Compute the number of incoming ranks */
11439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nranks, &remote));
114495fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) {
114595fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
114695fce210SBarry Smith       remote[i].index = 0;
114795fce210SBarry Smith     }
11489566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11499566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11509566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11519566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
115295fce210SBarry Smith     /* Enumerate the incoming ranks */
11539566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11549566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
115595fce210SBarry Smith     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11569566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11579566063dSJacob Faibussowitsch     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11589566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11599566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
11609566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
11619566063dSJacob Faibussowitsch     PetscCall(PetscFree2(inranks, outranks));
11629566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&bgcount));
116395fce210SBarry Smith   }
116495fce210SBarry Smith   *incoming = sf->ingroup;
116595fce210SBarry Smith 
116695fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
11679566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11689566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11699566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Group_free(&group));
117095fce210SBarry Smith   }
117195fce210SBarry Smith   *outgoing = sf->outgroup;
11723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
117395fce210SBarry Smith }
117495fce210SBarry Smith 
117529046d53SLisandro Dalcin /*@
11760dd791a8SStefano Zampini   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11770dd791a8SStefano Zampini 
11780dd791a8SStefano Zampini   Collective
11790dd791a8SStefano Zampini 
11800dd791a8SStefano Zampini   Input Parameter:
11810dd791a8SStefano Zampini . sf - star forest
11820dd791a8SStefano Zampini 
11830dd791a8SStefano Zampini   Output Parameter:
11840dd791a8SStefano Zampini . rsf - the star forest with a single root per process to perform communications
11850dd791a8SStefano Zampini 
11860dd791a8SStefano Zampini   Level: developer
11870dd791a8SStefano Zampini 
11880dd791a8SStefano Zampini .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
11890dd791a8SStefano Zampini @*/
11900dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
11910dd791a8SStefano Zampini {
11920dd791a8SStefano Zampini   PetscFunctionBegin;
11930dd791a8SStefano Zampini   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
11940dd791a8SStefano Zampini   PetscAssertPointer(rsf, 2);
11950dd791a8SStefano Zampini   if (!sf->rankssf) {
11960dd791a8SStefano Zampini     PetscSFNode       *rremotes;
11970dd791a8SStefano Zampini     const PetscMPIInt *ranks;
11980dd791a8SStefano Zampini     PetscInt           nranks;
11990dd791a8SStefano Zampini 
12000dd791a8SStefano Zampini     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12010dd791a8SStefano Zampini     PetscCall(PetscMalloc1(nranks, &rremotes));
12020dd791a8SStefano Zampini     for (PetscInt i = 0; i < nranks; i++) {
12030dd791a8SStefano Zampini       rremotes[i].rank  = ranks[i];
12040dd791a8SStefano Zampini       rremotes[i].index = 0;
12050dd791a8SStefano Zampini     }
12060dd791a8SStefano Zampini     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12070dd791a8SStefano Zampini     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12080dd791a8SStefano Zampini   }
12090dd791a8SStefano Zampini   *rsf = sf->rankssf;
12100dd791a8SStefano Zampini   PetscFunctionReturn(PETSC_SUCCESS);
12110dd791a8SStefano Zampini }
12120dd791a8SStefano Zampini 
12130dd791a8SStefano Zampini /*@
1214cab54364SBarry Smith   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
121595fce210SBarry Smith 
121695fce210SBarry Smith   Collective
121795fce210SBarry Smith 
12184165533cSJose E. Roman   Input Parameter:
121995fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
122095fce210SBarry Smith 
12214165533cSJose E. Roman   Output Parameter:
122295fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1
122395fce210SBarry Smith 
122495fce210SBarry Smith   Level: developer
122595fce210SBarry Smith 
1226cab54364SBarry Smith   Note:
1227cab54364SBarry Smith   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
122895fce210SBarry Smith   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
122995fce210SBarry Smith   edge, it is a candidate for future optimization that might involve its removal.
123095fce210SBarry Smith 
1231cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
123295fce210SBarry Smith @*/
1233d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1234d71ae5a4SJacob Faibussowitsch {
123595fce210SBarry Smith   PetscFunctionBegin;
123695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12374f572ea9SToby Isaac   PetscAssertPointer(multi, 2);
123895fce210SBarry Smith   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12399566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
124095fce210SBarry Smith     *multi           = sf->multi;
1241013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12423ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
124395fce210SBarry Smith   }
124495fce210SBarry Smith   if (!sf->multi) {
124595fce210SBarry Smith     const PetscInt *indegree;
12469837ea96SMatthew G. Knepley     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
124795fce210SBarry Smith     PetscSFNode    *remote;
124829046d53SLisandro Dalcin     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12499566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12509566063dSJacob Faibussowitsch     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12519566063dSJacob Faibussowitsch     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
125295fce210SBarry Smith     inoffset[0] = 0;
125395fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12549837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) outones[i] = 1;
12559566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12569566063dSJacob Faibussowitsch     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
125795fce210SBarry Smith     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
125876bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1259ad540459SPierre Jolivet       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
126076bd3646SJed Brown     }
12619566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(sf->nleaves, &remote));
126295fce210SBarry Smith     for (i = 0; i < sf->nleaves; i++) {
126395fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
126438e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
126595fce210SBarry Smith     }
12669566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1267013b3241SStefano Zampini     sf->multi->multi = sf->multi;
12689566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
126995fce210SBarry Smith     if (sf->rankorder) { /* Sort the ranks */
127095fce210SBarry Smith       PetscMPIInt  rank;
127195fce210SBarry Smith       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
127295fce210SBarry Smith       PetscSFNode *newremote;
12739566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
127495fce210SBarry Smith       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12759566063dSJacob Faibussowitsch       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12769837ea96SMatthew G. Knepley       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12779566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12789566063dSJacob Faibussowitsch       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
127995fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
128095fce210SBarry Smith       for (i = 0; i < sf->nroots; i++) {
128195fce210SBarry Smith         PetscInt j;
128295fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
12838e3a54c0SPierre Jolivet         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
128495fce210SBarry Smith         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
128595fce210SBarry Smith       }
12869566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
12879566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
12889566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
128995fce210SBarry Smith       for (i = 0; i < sf->nleaves; i++) {
129095fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
129101365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
129295fce210SBarry Smith       }
12939566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
12949566063dSJacob Faibussowitsch       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
129595fce210SBarry Smith     }
12969566063dSJacob Faibussowitsch     PetscCall(PetscFree3(inoffset, outones, outoffset));
129795fce210SBarry Smith   }
129895fce210SBarry Smith   *multi = sf->multi;
12993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
130095fce210SBarry Smith }
130195fce210SBarry Smith 
130295fce210SBarry Smith /*@C
130320662ed9SBarry Smith   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
130495fce210SBarry Smith 
130595fce210SBarry Smith   Collective
130695fce210SBarry Smith 
13074165533cSJose E. Roman   Input Parameters:
130895fce210SBarry Smith + sf        - original star forest
1309ba2a7774SJunchao Zhang . nselected - number of selected roots on this process
1310ba2a7774SJunchao Zhang - selected  - indices of the selected roots on this process
131195fce210SBarry Smith 
13124165533cSJose E. Roman   Output Parameter:
1313cd620004SJunchao Zhang . esf - new star forest
131495fce210SBarry Smith 
131595fce210SBarry Smith   Level: advanced
131695fce210SBarry Smith 
131795fce210SBarry Smith   Note:
1318cab54364SBarry Smith   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
131995fce210SBarry Smith   be done by calling PetscSFGetGraph().
132095fce210SBarry Smith 
1321cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
132295fce210SBarry Smith @*/
1323d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1324d71ae5a4SJacob Faibussowitsch {
1325cd620004SJunchao Zhang   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1326cd620004SJunchao Zhang   const PetscInt    *ilocal;
1327cd620004SJunchao Zhang   signed char       *rootdata, *leafdata, *leafmem;
1328ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1329f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1330f659e5c7SJunchao Zhang   MPI_Comm           comm;
133195fce210SBarry Smith 
133295fce210SBarry Smith   PetscFunctionBegin;
133395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
133429046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
13354f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
13364f572ea9SToby Isaac   PetscAssertPointer(esf, 4);
13370511a646SMatthew G. Knepley 
13389566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
13399566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13409566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13419566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1342cd620004SJunchao Zhang 
134376bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1344cd620004SJunchao Zhang     PetscBool dups;
13459566063dSJacob Faibussowitsch     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
134628b400f6SJacob Faibussowitsch     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1347511e6246SStefano Zampini     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1348cd620004SJunchao Zhang   }
1349f659e5c7SJunchao Zhang 
1350dbbe0bcdSBarry Smith   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1351dbbe0bcdSBarry Smith   else {
1352cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
13539566063dSJacob Faibussowitsch     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1354cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
13559566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13568e3a54c0SPierre Jolivet     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1357cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1358cd620004SJunchao Zhang     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13599566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13609566063dSJacob Faibussowitsch     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1361ba2a7774SJunchao Zhang 
1362cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1363cd620004SJunchao Zhang     esf_nleaves = 0;
1364cd620004SJunchao Zhang     for (i = 0; i < nleaves; i++) {
1365cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1366cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1367cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1368cd620004SJunchao Zhang       */
1369cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1370cd620004SJunchao Zhang     }
13719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13729566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1373cd620004SJunchao Zhang     for (i = n = 0; i < nleaves; i++) {
1374cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1375cd620004SJunchao Zhang       if (leafdata[j]) {
1376cd620004SJunchao Zhang         new_ilocal[n]        = j;
1377cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1378cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1379fc1ede2bSMatthew G. Knepley         ++n;
138095fce210SBarry Smith       }
138195fce210SBarry Smith     }
13829566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, esf));
13839566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(*esf));
13849566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
13859566063dSJacob Faibussowitsch     PetscCall(PetscFree2(rootdata, leafmem));
1386f659e5c7SJunchao Zhang   }
13879566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
13883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
138995fce210SBarry Smith }
139095fce210SBarry Smith 
13912f5fb4c2SMatthew G. Knepley /*@C
139220662ed9SBarry Smith   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
13932f5fb4c2SMatthew G. Knepley 
13942f5fb4c2SMatthew G. Knepley   Collective
13952f5fb4c2SMatthew G. Knepley 
13964165533cSJose E. Roman   Input Parameters:
13972f5fb4c2SMatthew G. Knepley + sf        - original star forest
1398f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process
1399f659e5c7SJunchao Zhang - selected  - indices of the selected leaves on this process
14002f5fb4c2SMatthew G. Knepley 
14014165533cSJose E. Roman   Output Parameter:
14022f5fb4c2SMatthew G. Knepley . newsf - new star forest
14032f5fb4c2SMatthew G. Knepley 
14042f5fb4c2SMatthew G. Knepley   Level: advanced
14052f5fb4c2SMatthew G. Knepley 
1406cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14072f5fb4c2SMatthew G. Knepley @*/
1408d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1409d71ae5a4SJacob Faibussowitsch {
1410f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1411f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1412f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1413f659e5c7SJunchao Zhang   PetscInt           i, nroots, *leaves, *new_ilocal;
1414f659e5c7SJunchao Zhang   MPI_Comm           comm;
14152f5fb4c2SMatthew G. Knepley 
14162f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
14172f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
141829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf, 1);
14194f572ea9SToby Isaac   if (nselected) PetscAssertPointer(selected, 3);
14204f572ea9SToby Isaac   PetscAssertPointer(newsf, 4);
14212f5fb4c2SMatthew G. Knepley 
1422f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
14239566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &leaves));
14259566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(leaves, selected, nselected));
14269566063dSJacob Faibussowitsch   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
142708401ef6SPierre Jolivet   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1428f659e5c7SJunchao Zhang 
1429f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1430dbbe0bcdSBarry Smith   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1431dbbe0bcdSBarry Smith   else {
14329566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14339566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_ilocal));
14349566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nselected, &new_iremote));
1435f659e5c7SJunchao Zhang     for (i = 0; i < nselected; ++i) {
1436f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1437f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1438f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1439f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
14402f5fb4c2SMatthew G. Knepley     }
14419566063dSJacob Faibussowitsch     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14429566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1443f659e5c7SJunchao Zhang   }
14449566063dSJacob Faibussowitsch   PetscCall(PetscFree(leaves));
14453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14462f5fb4c2SMatthew G. Knepley }
14472f5fb4c2SMatthew G. Knepley 
144895fce210SBarry Smith /*@C
1449cab54364SBarry Smith   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14503482bfa8SJunchao Zhang 
1451c3339decSBarry Smith   Collective
14523482bfa8SJunchao Zhang 
14534165533cSJose E. Roman   Input Parameters:
14543482bfa8SJunchao Zhang + sf       - star forest on which to communicate
14553482bfa8SJunchao Zhang . unit     - data type associated with each node
14563482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14573482bfa8SJunchao Zhang - op       - operation to use for reduction
14583482bfa8SJunchao Zhang 
14594165533cSJose E. Roman   Output Parameter:
14603482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14613482bfa8SJunchao Zhang 
14623482bfa8SJunchao Zhang   Level: intermediate
14633482bfa8SJunchao Zhang 
146420662ed9SBarry Smith   Note:
146520662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1466da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1467cab54364SBarry Smith   use `PetscSFBcastWithMemTypeBegin()` instead.
1468cab54364SBarry Smith 
1469cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14703482bfa8SJunchao Zhang @*/
1471d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1472d71ae5a4SJacob Faibussowitsch {
1473eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
14743482bfa8SJunchao Zhang 
14753482bfa8SJunchao Zhang   PetscFunctionBegin;
14763482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14779566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
14789566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14799566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
14809566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1481dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14829566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
14833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14843482bfa8SJunchao Zhang }
14853482bfa8SJunchao Zhang 
14863482bfa8SJunchao Zhang /*@C
148720662ed9SBarry Smith   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
148820662ed9SBarry Smith   to `PetscSFBcastEnd()`
1489d0295fc0SJunchao Zhang 
1490c3339decSBarry Smith   Collective
1491d0295fc0SJunchao Zhang 
14924165533cSJose E. Roman   Input Parameters:
1493d0295fc0SJunchao Zhang + sf        - star forest on which to communicate
1494d0295fc0SJunchao Zhang . unit      - data type associated with each node
1495d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1496d0295fc0SJunchao Zhang . rootdata  - buffer to broadcast
1497d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1498d0295fc0SJunchao Zhang - op        - operation to use for reduction
1499d0295fc0SJunchao Zhang 
15004165533cSJose E. Roman   Output Parameter:
1501d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1502d0295fc0SJunchao Zhang 
1503d0295fc0SJunchao Zhang   Level: intermediate
1504d0295fc0SJunchao Zhang 
1505cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1506d0295fc0SJunchao Zhang @*/
1507d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1508d71ae5a4SJacob Faibussowitsch {
1509d0295fc0SJunchao Zhang   PetscFunctionBegin;
1510d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15119566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15129566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1513dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15149566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1516d0295fc0SJunchao Zhang }
1517d0295fc0SJunchao Zhang 
1518d0295fc0SJunchao Zhang /*@C
151920662ed9SBarry Smith   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15203482bfa8SJunchao Zhang 
15213482bfa8SJunchao Zhang   Collective
15223482bfa8SJunchao Zhang 
15234165533cSJose E. Roman   Input Parameters:
15243482bfa8SJunchao Zhang + sf       - star forest
15253482bfa8SJunchao Zhang . unit     - data type
15263482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15273482bfa8SJunchao Zhang - op       - operation to use for reduction
15283482bfa8SJunchao Zhang 
15294165533cSJose E. Roman   Output Parameter:
15303482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15313482bfa8SJunchao Zhang 
15323482bfa8SJunchao Zhang   Level: intermediate
15333482bfa8SJunchao Zhang 
1534cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15353482bfa8SJunchao Zhang @*/
1536d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1537d71ae5a4SJacob Faibussowitsch {
15383482bfa8SJunchao Zhang   PetscFunctionBegin;
15393482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15409566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1541dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15429566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15433ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
15443482bfa8SJunchao Zhang }
15453482bfa8SJunchao Zhang 
15463482bfa8SJunchao Zhang /*@C
1547cab54364SBarry Smith   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
154895fce210SBarry Smith 
154995fce210SBarry Smith   Collective
155095fce210SBarry Smith 
15514165533cSJose E. Roman   Input Parameters:
155295fce210SBarry Smith + sf       - star forest
155395fce210SBarry Smith . unit     - data type
155495fce210SBarry Smith . leafdata - values to reduce
155595fce210SBarry Smith - op       - reduction operation
155695fce210SBarry Smith 
15574165533cSJose E. Roman   Output Parameter:
155895fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
155995fce210SBarry Smith 
156095fce210SBarry Smith   Level: intermediate
156195fce210SBarry Smith 
156220662ed9SBarry Smith   Note:
156320662ed9SBarry Smith   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1564da81f932SPierre Jolivet   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1565cab54364SBarry Smith   use `PetscSFReduceWithMemTypeBegin()` instead.
1566d0295fc0SJunchao Zhang 
156720662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
156895fce210SBarry Smith @*/
1569d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1570d71ae5a4SJacob Faibussowitsch {
1571eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
157295fce210SBarry Smith 
157395fce210SBarry Smith   PetscFunctionBegin;
157495fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15759566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
15769566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15779566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
15789566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1579f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15809566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
158295fce210SBarry Smith }
158395fce210SBarry Smith 
158495fce210SBarry Smith /*@C
1585cab54364SBarry Smith   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1586d0295fc0SJunchao Zhang 
1587d0295fc0SJunchao Zhang   Collective
1588d0295fc0SJunchao Zhang 
15894165533cSJose E. Roman   Input Parameters:
1590d0295fc0SJunchao Zhang + sf        - star forest
1591d0295fc0SJunchao Zhang . unit      - data type
1592d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata
1593d0295fc0SJunchao Zhang . leafdata  - values to reduce
1594d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata
1595d0295fc0SJunchao Zhang - op        - reduction operation
1596d0295fc0SJunchao Zhang 
15974165533cSJose E. Roman   Output Parameter:
1598d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1599d0295fc0SJunchao Zhang 
1600d0295fc0SJunchao Zhang   Level: intermediate
1601d0295fc0SJunchao Zhang 
160220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1603d0295fc0SJunchao Zhang @*/
1604d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1605d71ae5a4SJacob Faibussowitsch {
1606d0295fc0SJunchao Zhang   PetscFunctionBegin;
1607d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16089566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16099566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1610f4f49eeaSPierre Jolivet   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16119566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1613d0295fc0SJunchao Zhang }
1614d0295fc0SJunchao Zhang 
1615d0295fc0SJunchao Zhang /*@C
161620662ed9SBarry Smith   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
161795fce210SBarry Smith 
161895fce210SBarry Smith   Collective
161995fce210SBarry Smith 
16204165533cSJose E. Roman   Input Parameters:
162195fce210SBarry Smith + sf       - star forest
162295fce210SBarry Smith . unit     - data type
162395fce210SBarry Smith . leafdata - values to reduce
162495fce210SBarry Smith - op       - reduction operation
162595fce210SBarry Smith 
16264165533cSJose E. Roman   Output Parameter:
162795fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
162895fce210SBarry Smith 
162995fce210SBarry Smith   Level: intermediate
163095fce210SBarry Smith 
163120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
163295fce210SBarry Smith @*/
1633d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1634d71ae5a4SJacob Faibussowitsch {
163595fce210SBarry Smith   PetscFunctionBegin;
163695fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16379566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1638dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16399566063dSJacob Faibussowitsch   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
164195fce210SBarry Smith }
164295fce210SBarry Smith 
164395fce210SBarry Smith /*@C
1644cab54364SBarry Smith   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1645cab54364SBarry Smith   to be completed with `PetscSFFetchAndOpEnd()`
1646a1729e3fSJunchao Zhang 
1647a1729e3fSJunchao Zhang   Collective
1648a1729e3fSJunchao Zhang 
16494165533cSJose E. Roman   Input Parameters:
1650a1729e3fSJunchao Zhang + sf       - star forest
1651a1729e3fSJunchao Zhang . unit     - data type
1652a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1653a1729e3fSJunchao Zhang - op       - operation to use for reduction
1654a1729e3fSJunchao Zhang 
16554165533cSJose E. Roman   Output Parameters:
1656a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1657a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1658a1729e3fSJunchao Zhang 
1659a1729e3fSJunchao Zhang   Level: advanced
1660a1729e3fSJunchao Zhang 
1661a1729e3fSJunchao Zhang   Note:
1662a1729e3fSJunchao Zhang   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1663a1729e3fSJunchao Zhang   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1664a1729e3fSJunchao Zhang   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1665a1729e3fSJunchao Zhang   integers.
1666a1729e3fSJunchao Zhang 
1667cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1668a1729e3fSJunchao Zhang @*/
1669d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1670d71ae5a4SJacob Faibussowitsch {
1671eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1672a1729e3fSJunchao Zhang 
1673a1729e3fSJunchao Zhang   PetscFunctionBegin;
1674a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16759566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
16769566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16779566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
16789566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
16799566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
168008401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1681dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16829566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1684a1729e3fSJunchao Zhang }
1685a1729e3fSJunchao Zhang 
1686a1729e3fSJunchao Zhang /*@C
1687cab54364SBarry Smith   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1688cab54364SBarry Smith   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1689d3b3e55cSJunchao Zhang 
1690d3b3e55cSJunchao Zhang   Collective
1691d3b3e55cSJunchao Zhang 
1692d3b3e55cSJunchao Zhang   Input Parameters:
1693d3b3e55cSJunchao Zhang + sf              - star forest
1694d3b3e55cSJunchao Zhang . unit            - data type
1695d3b3e55cSJunchao Zhang . rootmtype       - memory type of rootdata
1696d3b3e55cSJunchao Zhang . leafmtype       - memory type of leafdata
1697d3b3e55cSJunchao Zhang . leafdata        - leaf values to use in reduction
1698d3b3e55cSJunchao Zhang . leafupdatemtype - memory type of leafupdate
1699d3b3e55cSJunchao Zhang - op              - operation to use for reduction
1700d3b3e55cSJunchao Zhang 
1701d3b3e55cSJunchao Zhang   Output Parameters:
1702d3b3e55cSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1703d3b3e55cSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1704d3b3e55cSJunchao Zhang 
1705d3b3e55cSJunchao Zhang   Level: advanced
1706d3b3e55cSJunchao Zhang 
1707cab54364SBarry Smith   Note:
1708cab54364SBarry Smith   See `PetscSFFetchAndOpBegin()` for more details.
1709d3b3e55cSJunchao Zhang 
171020662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1711d3b3e55cSJunchao Zhang @*/
1712d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1713d71ae5a4SJacob Faibussowitsch {
1714d3b3e55cSJunchao Zhang   PetscFunctionBegin;
1715d3b3e55cSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17169566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
17179566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
171808401ef6SPierre Jolivet   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1719dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17209566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1722d3b3e55cSJunchao Zhang }
1723d3b3e55cSJunchao Zhang 
1724d3b3e55cSJunchao Zhang /*@C
172520662ed9SBarry Smith   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
172620662ed9SBarry Smith   to fetch values from roots and update atomically by applying operation using my leaf value
1727a1729e3fSJunchao Zhang 
1728a1729e3fSJunchao Zhang   Collective
1729a1729e3fSJunchao Zhang 
17304165533cSJose E. Roman   Input Parameters:
1731a1729e3fSJunchao Zhang + sf       - star forest
1732a1729e3fSJunchao Zhang . unit     - data type
1733a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1734a1729e3fSJunchao Zhang - op       - operation to use for reduction
1735a1729e3fSJunchao Zhang 
17364165533cSJose E. Roman   Output Parameters:
1737a1729e3fSJunchao Zhang + rootdata   - root values to be updated, input state is seen by first process to perform an update
1738a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1739a1729e3fSJunchao Zhang 
1740a1729e3fSJunchao Zhang   Level: advanced
1741a1729e3fSJunchao Zhang 
174220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1743a1729e3fSJunchao Zhang @*/
1744d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1745d71ae5a4SJacob Faibussowitsch {
1746a1729e3fSJunchao Zhang   PetscFunctionBegin;
1747a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17489566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1749dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17509566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1752a1729e3fSJunchao Zhang }
1753a1729e3fSJunchao Zhang 
1754a1729e3fSJunchao Zhang /*@C
1755cab54364SBarry Smith   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
175695fce210SBarry Smith 
175795fce210SBarry Smith   Collective
175895fce210SBarry Smith 
17594165533cSJose E. Roman   Input Parameter:
176095fce210SBarry Smith . sf - star forest
176195fce210SBarry Smith 
17624165533cSJose E. Roman   Output Parameter:
176395fce210SBarry Smith . degree - degree of each root vertex
176495fce210SBarry Smith 
176595fce210SBarry Smith   Level: advanced
176695fce210SBarry Smith 
1767cab54364SBarry Smith   Note:
176820662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1769ffe67aa5SVáclav Hapla 
1770cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
177195fce210SBarry Smith @*/
1772d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1773d71ae5a4SJacob Faibussowitsch {
177495fce210SBarry Smith   PetscFunctionBegin;
177595fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
177695fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
17774f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
1778803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
17795b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
178028b400f6SJacob Faibussowitsch     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
17815b0d146aSStefano Zampini     maxlocal = sf->maxleaf - sf->minleaf + 1;
17829566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nroots, &sf->degree));
17839566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
178429046d53SLisandro Dalcin     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
17859837ea96SMatthew G. Knepley     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
17869566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
178795fce210SBarry Smith   }
178895fce210SBarry Smith   *degree = NULL;
17893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
179095fce210SBarry Smith }
179195fce210SBarry Smith 
179295fce210SBarry Smith /*@C
1793cab54364SBarry Smith   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
179495fce210SBarry Smith 
179595fce210SBarry Smith   Collective
179695fce210SBarry Smith 
17974165533cSJose E. Roman   Input Parameter:
179895fce210SBarry Smith . sf - star forest
179995fce210SBarry Smith 
18004165533cSJose E. Roman   Output Parameter:
180195fce210SBarry Smith . degree - degree of each root vertex
180295fce210SBarry Smith 
180395fce210SBarry Smith   Level: developer
180495fce210SBarry Smith 
1805cab54364SBarry Smith   Note:
180620662ed9SBarry Smith   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1807ffe67aa5SVáclav Hapla 
1808cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
180995fce210SBarry Smith @*/
1810d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1811d71ae5a4SJacob Faibussowitsch {
181295fce210SBarry Smith   PetscFunctionBegin;
181395fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
181495fce210SBarry Smith   PetscSFCheckGraphSet(sf, 1);
18154f572ea9SToby Isaac   PetscAssertPointer(degree, 2);
181695fce210SBarry Smith   if (!sf->degreeknown) {
181728b400f6SJacob Faibussowitsch     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18189566063dSJacob Faibussowitsch     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18199566063dSJacob Faibussowitsch     PetscCall(PetscFree(sf->degreetmp));
182095fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
182195fce210SBarry Smith   }
182295fce210SBarry Smith   *degree = sf->degree;
18233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
182495fce210SBarry Smith }
182595fce210SBarry Smith 
1826673100f5SVaclav Hapla /*@C
182720662ed9SBarry Smith   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
182866dfcd1aSVaclav Hapla   Each multi-root is assigned index of the corresponding original root.
1829673100f5SVaclav Hapla 
1830673100f5SVaclav Hapla   Collective
1831673100f5SVaclav Hapla 
18324165533cSJose E. Roman   Input Parameters:
1833673100f5SVaclav Hapla + sf     - star forest
1834cab54364SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1835673100f5SVaclav Hapla 
18364165533cSJose E. Roman   Output Parameters:
183720662ed9SBarry Smith + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
183820662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1839673100f5SVaclav Hapla 
1840673100f5SVaclav Hapla   Level: developer
1841673100f5SVaclav Hapla 
1842cab54364SBarry Smith   Note:
184320662ed9SBarry Smith   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1844ffe67aa5SVáclav Hapla 
1845cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1846673100f5SVaclav Hapla @*/
1847d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1848d71ae5a4SJacob Faibussowitsch {
1849673100f5SVaclav Hapla   PetscSF  msf;
1850673100f5SVaclav Hapla   PetscInt i, j, k, nroots, nmroots;
1851673100f5SVaclav Hapla 
1852673100f5SVaclav Hapla   PetscFunctionBegin;
1853673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18549566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18554f572ea9SToby Isaac   if (nroots) PetscAssertPointer(degree, 2);
18564f572ea9SToby Isaac   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18574f572ea9SToby Isaac   PetscAssertPointer(multiRootsOrigNumbering, 4);
18589566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &msf));
18599566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1861673100f5SVaclav Hapla   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1862673100f5SVaclav Hapla     if (!degree[i]) continue;
1863ad540459SPierre Jolivet     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1864673100f5SVaclav Hapla   }
186508401ef6SPierre Jolivet   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
186666dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
18673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1868673100f5SVaclav Hapla }
1869673100f5SVaclav Hapla 
187095fce210SBarry Smith /*@C
1871cab54364SBarry Smith   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
187295fce210SBarry Smith 
187395fce210SBarry Smith   Collective
187495fce210SBarry Smith 
18754165533cSJose E. Roman   Input Parameters:
187695fce210SBarry Smith + sf       - star forest
187795fce210SBarry Smith . unit     - data type
187895fce210SBarry Smith - leafdata - leaf data to gather to roots
187995fce210SBarry Smith 
18804165533cSJose E. Roman   Output Parameter:
188195fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
188295fce210SBarry Smith 
188395fce210SBarry Smith   Level: intermediate
188495fce210SBarry Smith 
1885cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
188695fce210SBarry Smith @*/
1887d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1888d71ae5a4SJacob Faibussowitsch {
1889a5526d50SJunchao Zhang   PetscSF multi = NULL;
189095fce210SBarry Smith 
189195fce210SBarry Smith   PetscFunctionBegin;
189295fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18939566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
18949566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
18959566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
18963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
189795fce210SBarry Smith }
189895fce210SBarry Smith 
189995fce210SBarry Smith /*@C
1900cab54364SBarry Smith   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
190195fce210SBarry Smith 
190295fce210SBarry Smith   Collective
190395fce210SBarry Smith 
19044165533cSJose E. Roman   Input Parameters:
190595fce210SBarry Smith + sf       - star forest
190695fce210SBarry Smith . unit     - data type
190795fce210SBarry Smith - leafdata - leaf data to gather to roots
190895fce210SBarry Smith 
19094165533cSJose E. Roman   Output Parameter:
191095fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
191195fce210SBarry Smith 
191295fce210SBarry Smith   Level: intermediate
191395fce210SBarry Smith 
1914cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
191595fce210SBarry Smith @*/
1916d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1917d71ae5a4SJacob Faibussowitsch {
1918a5526d50SJunchao Zhang   PetscSF multi = NULL;
191995fce210SBarry Smith 
192095fce210SBarry Smith   PetscFunctionBegin;
192195fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19229566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19239566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
192595fce210SBarry Smith }
192695fce210SBarry Smith 
192795fce210SBarry Smith /*@C
1928cab54364SBarry Smith   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
192995fce210SBarry Smith 
193095fce210SBarry Smith   Collective
193195fce210SBarry Smith 
19324165533cSJose E. Roman   Input Parameters:
193395fce210SBarry Smith + sf            - star forest
193495fce210SBarry Smith . unit          - data type
193595fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
193695fce210SBarry Smith 
19374165533cSJose E. Roman   Output Parameter:
193895fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
193995fce210SBarry Smith 
194095fce210SBarry Smith   Level: intermediate
194195fce210SBarry Smith 
194220662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
194395fce210SBarry Smith @*/
1944d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1945d71ae5a4SJacob Faibussowitsch {
1946a5526d50SJunchao Zhang   PetscSF multi = NULL;
194795fce210SBarry Smith 
194895fce210SBarry Smith   PetscFunctionBegin;
194995fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19509566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
19519566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19529566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
195495fce210SBarry Smith }
195595fce210SBarry Smith 
195695fce210SBarry Smith /*@C
1957cab54364SBarry Smith   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
195895fce210SBarry Smith 
195995fce210SBarry Smith   Collective
196095fce210SBarry Smith 
19614165533cSJose E. Roman   Input Parameters:
196295fce210SBarry Smith + sf            - star forest
196395fce210SBarry Smith . unit          - data type
196495fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
196595fce210SBarry Smith 
19664165533cSJose E. Roman   Output Parameter:
196795fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
196895fce210SBarry Smith 
196995fce210SBarry Smith   Level: intermediate
197095fce210SBarry Smith 
197120662ed9SBarry Smith .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
197295fce210SBarry Smith @*/
1973d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1974d71ae5a4SJacob Faibussowitsch {
1975a5526d50SJunchao Zhang   PetscSF multi = NULL;
197695fce210SBarry Smith 
197795fce210SBarry Smith   PetscFunctionBegin;
197895fce210SBarry Smith   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19799566063dSJacob Faibussowitsch   PetscCall(PetscSFGetMultiSF(sf, &multi));
19809566063dSJacob Faibussowitsch   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
198295fce210SBarry Smith }
1983a7b3aa13SAta Mesgarnejad 
1984d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1985d71ae5a4SJacob Faibussowitsch {
1986a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
1987a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
1988a072220fSLawrence Mitchell   PetscHSetI      seen;
1989a072220fSLawrence Mitchell 
1990a072220fSLawrence Mitchell   PetscFunctionBegin;
1991b458e8f1SJose E. Roman   if (PetscDefined(USE_DEBUG)) {
19929566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
19939566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&seen));
1994a072220fSLawrence Mitchell     for (i = 0; i < nleaves; i++) {
1995a072220fSLawrence Mitchell       const PetscInt leaf = ilocal ? ilocal[i] : i;
19969566063dSJacob Faibussowitsch       PetscCall(PetscHSetIAdd(seen, leaf));
1997a072220fSLawrence Mitchell     }
19989566063dSJacob Faibussowitsch     PetscCall(PetscHSetIGetSize(seen, &n));
199908401ef6SPierre Jolivet     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
20009566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&seen));
2001b458e8f1SJose E. Roman   }
20023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2003a072220fSLawrence Mitchell }
200454729392SStefano Zampini 
2005a7b3aa13SAta Mesgarnejad /*@
2006cab54364SBarry Smith   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2007a7b3aa13SAta Mesgarnejad 
2008a7b3aa13SAta Mesgarnejad   Input Parameters:
2009cab54364SBarry Smith + sfA - The first `PetscSF`
2010cab54364SBarry Smith - sfB - The second `PetscSF`
2011a7b3aa13SAta Mesgarnejad 
20122fe279fdSBarry Smith   Output Parameter:
2013cab54364SBarry Smith . sfBA - The composite `PetscSF`
2014a7b3aa13SAta Mesgarnejad 
2015a7b3aa13SAta Mesgarnejad   Level: developer
2016a7b3aa13SAta Mesgarnejad 
2017a072220fSLawrence Mitchell   Notes:
2018cab54364SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
201954729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
202054729392SStefano Zampini 
202120662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
202220662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
202320662ed9SBarry Smith   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
202420662ed9SBarry Smith   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2025a072220fSLawrence Mitchell 
2026db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2027a7b3aa13SAta Mesgarnejad @*/
2028d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2029d71ae5a4SJacob Faibussowitsch {
2030a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA, *remotePointsB;
2031d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
203254729392SStefano Zampini   const PetscInt    *localPointsA, *localPointsB;
203354729392SStefano Zampini   PetscInt          *localPointsBA;
203454729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
203554729392SStefano Zampini   PetscBool          denseB;
2036a7b3aa13SAta Mesgarnejad 
2037a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
2038a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
203929046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA, 1);
204029046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
204129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB, 2);
204254729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
20434f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
20449566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20459566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
204654729392SStefano Zampini 
20479566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20489566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
204920662ed9SBarry Smith   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
205020662ed9SBarry Smith      numRootsB; otherwise, garbage will be broadcasted.
205120662ed9SBarry Smith      Example (comm size = 1):
205220662ed9SBarry Smith      sfA: 0 <- (0, 0)
205320662ed9SBarry Smith      sfB: 100 <- (0, 0)
205420662ed9SBarry Smith           101 <- (0, 1)
205520662ed9SBarry Smith      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
205620662ed9SBarry Smith      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
205720662ed9SBarry Smith      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
205820662ed9SBarry Smith      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
20599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
206054729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
206154729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
206254729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
206354729392SStefano Zampini   }
206454729392SStefano Zampini   for (i = 0; i < numLeavesA; i++) {
20650ea77edaSksagiyam     PetscInt localp = localPointsA ? localPointsA[i] : i;
20660ea77edaSksagiyam 
20670ea77edaSksagiyam     if (localp >= numRootsB) continue;
20680ea77edaSksagiyam     reorderedRemotePointsA[localp] = remotePointsA[i];
206954729392SStefano Zampini   }
2070d41018fbSJunchao Zhang   remotePointsA = reorderedRemotePointsA;
20719566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20730ea77edaSksagiyam   for (i = 0; i < maxleaf - minleaf + 1; i++) {
20740ea77edaSksagiyam     leafdataB[i].rank  = -1;
20750ea77edaSksagiyam     leafdataB[i].index = -1;
20760ea77edaSksagiyam   }
20778e3a54c0SPierre Jolivet   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20788e3a54c0SPierre Jolivet   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20799566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
2080d41018fbSJunchao Zhang 
208154729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
208254729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
208354729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
208454729392SStefano Zampini     else numLeavesBA++;
208554729392SStefano Zampini   }
208654729392SStefano Zampini   if (denseB) {
2087d41018fbSJunchao Zhang     localPointsBA  = NULL;
2088d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2089d41018fbSJunchao Zhang   } else {
20909566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
20919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
209254729392SStefano Zampini     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
209354729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
209454729392SStefano Zampini 
209554729392SStefano Zampini       if (leafdataB[l - minleaf].rank == -1) continue;
209654729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
209754729392SStefano Zampini       localPointsBA[numLeavesBA]  = l;
209854729392SStefano Zampini       numLeavesBA++;
209954729392SStefano Zampini     }
21009566063dSJacob Faibussowitsch     PetscCall(PetscFree(leafdataB));
2101d41018fbSJunchao Zhang   }
21029566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21039566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
21049566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2106a7b3aa13SAta Mesgarnejad }
21071c6ba672SJunchao Zhang 
210804c0ada0SJunchao Zhang /*@
2109cab54364SBarry Smith   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
211004c0ada0SJunchao Zhang 
211104c0ada0SJunchao Zhang   Input Parameters:
2112cab54364SBarry Smith + sfA - The first `PetscSF`
2113cab54364SBarry Smith - sfB - The second `PetscSF`
211404c0ada0SJunchao Zhang 
21152fe279fdSBarry Smith   Output Parameter:
2116cab54364SBarry Smith . sfBA - The composite `PetscSF`.
211704c0ada0SJunchao Zhang 
211804c0ada0SJunchao Zhang   Level: developer
211904c0ada0SJunchao Zhang 
212054729392SStefano Zampini   Notes:
212120662ed9SBarry Smith   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
212254729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
212320662ed9SBarry Smith   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
212454729392SStefano Zampini 
212520662ed9SBarry Smith   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
212620662ed9SBarry Smith   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
212720662ed9SBarry Smith   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
212820662ed9SBarry Smith   on `sfA`, then
212920662ed9SBarry Smith   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
213054729392SStefano Zampini 
2131db781477SPatrick Sanan .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
213204c0ada0SJunchao Zhang @*/
2133d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2134d71ae5a4SJacob Faibussowitsch {
213504c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA, *remotePointsB;
213604c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
213704c0ada0SJunchao Zhang   const PetscInt    *localPointsA, *localPointsB;
213854729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
213954729392SStefano Zampini   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21405b0d146aSStefano Zampini   MPI_Op             op;
21415b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21425b0d146aSStefano Zampini   PetscBool iswin;
21435b0d146aSStefano Zampini #endif
214404c0ada0SJunchao Zhang 
214504c0ada0SJunchao Zhang   PetscFunctionBegin;
214604c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
214704c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA, 1);
214804c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
214904c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB, 2);
215054729392SStefano Zampini   PetscCheckSameComm(sfA, 1, sfB, 2);
21514f572ea9SToby Isaac   PetscAssertPointer(sfBA, 3);
21529566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21539566063dSJacob Faibussowitsch   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
215454729392SStefano Zampini 
21559566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21569566063dSJacob Faibussowitsch   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21575b0d146aSStefano Zampini 
21585b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
21595b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
216083df288dSJunchao Zhang      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21615b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
21625b0d146aSStefano Zampini      the root condition is not meet.
21635b0d146aSStefano Zampini    */
21645b0d146aSStefano Zampini   op = MPI_MAXLOC;
21655b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21665b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21679566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
216883df288dSJunchao Zhang   if (iswin) op = MPI_REPLACE;
21695b0d146aSStefano Zampini #endif
21705b0d146aSStefano Zampini 
21719566063dSJacob Faibussowitsch   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
217354729392SStefano Zampini   for (i = 0; i < maxleaf - minleaf + 1; i++) {
217454729392SStefano Zampini     reorderedRemotePointsA[i].rank  = -1;
217554729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
217654729392SStefano Zampini   }
217754729392SStefano Zampini   if (localPointsA) {
217854729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
217954729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
218054729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
218154729392SStefano Zampini     }
218254729392SStefano Zampini   } else {
218354729392SStefano Zampini     for (i = 0; i < numLeavesA; i++) {
218454729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
218554729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
218654729392SStefano Zampini     }
218754729392SStefano Zampini   }
218854729392SStefano Zampini 
21899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
21909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
219154729392SStefano Zampini   for (i = 0; i < numRootsB; i++) {
219254729392SStefano Zampini     remotePointsBA[i].rank  = -1;
219354729392SStefano Zampini     remotePointsBA[i].index = -1;
219454729392SStefano Zampini   }
219554729392SStefano Zampini 
21968e3a54c0SPierre Jolivet   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
21978e3a54c0SPierre Jolivet   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
21989566063dSJacob Faibussowitsch   PetscCall(PetscFree(reorderedRemotePointsA));
219954729392SStefano Zampini   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
220054729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
220154729392SStefano Zampini     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
220254729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
220354729392SStefano Zampini     localPointsBA[numLeavesBA]        = i;
220454729392SStefano Zampini     numLeavesBA++;
220554729392SStefano Zampini   }
22069566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22079566063dSJacob Faibussowitsch   PetscCall(PetscSFSetFromOptions(*sfBA));
22089566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
221004c0ada0SJunchao Zhang }
221104c0ada0SJunchao Zhang 
22121c6ba672SJunchao Zhang /*
2213cab54364SBarry Smith   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22141c6ba672SJunchao Zhang 
22152fe279fdSBarry Smith   Input Parameter:
2216cab54364SBarry Smith . sf - The global `PetscSF`
22171c6ba672SJunchao Zhang 
22182fe279fdSBarry Smith   Output Parameter:
2219cab54364SBarry Smith . out - The local `PetscSF`
2220cab54364SBarry Smith 
2221cab54364SBarry Smith .seealso: `PetscSF`, `PetscSFCreate()`
22221c6ba672SJunchao Zhang  */
2223d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2224d71ae5a4SJacob Faibussowitsch {
22251c6ba672SJunchao Zhang   MPI_Comm           comm;
22261c6ba672SJunchao Zhang   PetscMPIInt        myrank;
22271c6ba672SJunchao Zhang   const PetscInt    *ilocal;
22281c6ba672SJunchao Zhang   const PetscSFNode *iremote;
22291c6ba672SJunchao Zhang   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
22301c6ba672SJunchao Zhang   PetscSFNode       *liremote;
22311c6ba672SJunchao Zhang   PetscSF            lsf;
22321c6ba672SJunchao Zhang 
22331c6ba672SJunchao Zhang   PetscFunctionBegin;
22341c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2235dbbe0bcdSBarry Smith   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2236dbbe0bcdSBarry Smith   else {
22371c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22389566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22399566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22401c6ba672SJunchao Zhang 
22411c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
22429566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22439371c9d4SSatish Balay     for (i = lnleaves = 0; i < nleaves; i++) {
22449371c9d4SSatish Balay       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
22459371c9d4SSatish Balay     }
22469566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &lilocal));
22479566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(lnleaves, &liremote));
22481c6ba672SJunchao Zhang 
22491c6ba672SJunchao Zhang     for (i = j = 0; i < nleaves; i++) {
22501c6ba672SJunchao Zhang       if (iremote[i].rank == (PetscInt)myrank) {
22511c6ba672SJunchao Zhang         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22521c6ba672SJunchao Zhang         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
22531c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
22541c6ba672SJunchao Zhang         j++;
22551c6ba672SJunchao Zhang       }
22561c6ba672SJunchao Zhang     }
22579566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22589566063dSJacob Faibussowitsch     PetscCall(PetscSFSetFromOptions(lsf));
22599566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22609566063dSJacob Faibussowitsch     PetscCall(PetscSFSetUp(lsf));
22611c6ba672SJunchao Zhang     *out = lsf;
22621c6ba672SJunchao Zhang   }
22633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22641c6ba672SJunchao Zhang }
2265dd5b3ca6SJunchao Zhang 
2266dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2267d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2268d71ae5a4SJacob Faibussowitsch {
2269eb02082bSJunchao Zhang   PetscMemType rootmtype, leafmtype;
2270dd5b3ca6SJunchao Zhang 
2271dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2272dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22739566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(sf));
22749566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22759566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(rootdata, &rootmtype));
22769566063dSJacob Faibussowitsch   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2277dbbe0bcdSBarry Smith   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22789566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
22793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2280dd5b3ca6SJunchao Zhang }
2281dd5b3ca6SJunchao Zhang 
2282157edd7aSVaclav Hapla /*@
2283cab54364SBarry Smith   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2284157edd7aSVaclav Hapla 
2285157edd7aSVaclav Hapla   Input Parameters:
2286157edd7aSVaclav Hapla + comm        - the communicator
2287cab54364SBarry Smith . nsfs        - the number of input `PetscSF`
2288cab54364SBarry Smith . sfs         - the array of input `PetscSF`
22891f40158dSVaclav Hapla . rootMode    - the root mode specifying how roots are handled
229020662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2291157edd7aSVaclav Hapla 
22922fe279fdSBarry Smith   Output Parameter:
2293cab54364SBarry Smith . newsf - The resulting `PetscSF`
2294157edd7aSVaclav Hapla 
22951f40158dSVaclav Hapla   Level: advanced
2296157edd7aSVaclav Hapla 
2297157edd7aSVaclav Hapla   Notes:
229820662ed9SBarry Smith   The communicator of all `PetscSF`s in `sfs` must be comm.
2299157edd7aSVaclav Hapla 
230020662ed9SBarry Smith   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
230120662ed9SBarry Smith 
230220662ed9SBarry Smith   The offsets in `leafOffsets` are added to the original leaf indices.
230320662ed9SBarry Smith 
230420662ed9SBarry Smith   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
230520662ed9SBarry Smith   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
230620662ed9SBarry Smith 
230720662ed9SBarry Smith   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2308157edd7aSVaclav Hapla   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2309157edd7aSVaclav Hapla 
231020662ed9SBarry Smith   All root modes retain the essential connectivity condition.
231120662ed9SBarry Smith   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
231220662ed9SBarry Smith   Parameter `rootMode` controls how the input root spaces are combined.
231320662ed9SBarry Smith   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
231420662ed9SBarry Smith   and is also the same in the output `PetscSF`.
23151f40158dSVaclav Hapla   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23161f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
231720662ed9SBarry Smith   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23181f40158dSVaclav Hapla   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23191593df67SStefano Zampini   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
23201f40158dSVaclav Hapla   the original root ranks are ignored.
23211f40158dSVaclav Hapla   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
232220662ed9SBarry Smith   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
232320662ed9SBarry Smith   to keep the load balancing.
232420662ed9SBarry Smith   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
23251f40158dSVaclav Hapla 
23261f40158dSVaclav Hapla   Example:
23271f40158dSVaclav Hapla   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
232820662ed9SBarry Smith .vb
232920662ed9SBarry Smith   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
233020662ed9SBarry Smith   for m in {local,global,shared}; do
233120662ed9SBarry Smith     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
233220662ed9SBarry Smith   done
233320662ed9SBarry Smith .ve
233420662ed9SBarry Smith   we generate two identical `PetscSF`s sf_0 and sf_1,
233520662ed9SBarry Smith .vb
233620662ed9SBarry Smith   PetscSF Object: sf_0 2 MPI processes
233720662ed9SBarry Smith     type: basic
233820662ed9SBarry Smith     rank #leaves #roots
233920662ed9SBarry Smith     [ 0]       4      2
234020662ed9SBarry Smith     [ 1]       4      2
234120662ed9SBarry Smith     leaves      roots       roots in global numbering
234220662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
234320662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
234420662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
234520662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
234620662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
234720662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
234820662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
234920662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
235020662ed9SBarry Smith .ve
2351e33f79d8SJacob Faibussowitsch   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
235220662ed9SBarry Smith .vb
235320662ed9SBarry Smith   rootMode = local:
235420662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
235520662ed9SBarry Smith     type: basic
235620662ed9SBarry Smith     rank #leaves #roots
235720662ed9SBarry Smith     [ 0]       8      4
235820662ed9SBarry Smith     [ 1]       8      4
235920662ed9SBarry Smith     leaves      roots       roots in global numbering
236020662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
236120662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
236220662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   4
236320662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   5
236420662ed9SBarry Smith     ( 0,  4) <- ( 0,  2)  =   2
236520662ed9SBarry Smith     ( 0,  5) <- ( 0,  3)  =   3
236620662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
236720662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
236820662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
236920662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
237020662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   4
237120662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   5
237220662ed9SBarry Smith     ( 1,  4) <- ( 0,  2)  =   2
237320662ed9SBarry Smith     ( 1,  5) <- ( 0,  3)  =   3
237420662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
237520662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
237620662ed9SBarry Smith 
237720662ed9SBarry Smith   rootMode = global:
237820662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
237920662ed9SBarry Smith     type: basic
238020662ed9SBarry Smith     rank #leaves #roots
238120662ed9SBarry Smith     [ 0]       8      4
238220662ed9SBarry Smith     [ 1]       8      4
238320662ed9SBarry Smith     leaves      roots       roots in global numbering
238420662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
238520662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
238620662ed9SBarry Smith     ( 0,  2) <- ( 0,  2)  =   2
238720662ed9SBarry Smith     ( 0,  3) <- ( 0,  3)  =   3
238820662ed9SBarry Smith     ( 0,  4) <- ( 1,  0)  =   4
238920662ed9SBarry Smith     ( 0,  5) <- ( 1,  1)  =   5
239020662ed9SBarry Smith     ( 0,  6) <- ( 1,  2)  =   6
239120662ed9SBarry Smith     ( 0,  7) <- ( 1,  3)  =   7
239220662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
239320662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
239420662ed9SBarry Smith     ( 1,  2) <- ( 0,  2)  =   2
239520662ed9SBarry Smith     ( 1,  3) <- ( 0,  3)  =   3
239620662ed9SBarry Smith     ( 1,  4) <- ( 1,  0)  =   4
239720662ed9SBarry Smith     ( 1,  5) <- ( 1,  1)  =   5
239820662ed9SBarry Smith     ( 1,  6) <- ( 1,  2)  =   6
239920662ed9SBarry Smith     ( 1,  7) <- ( 1,  3)  =   7
240020662ed9SBarry Smith 
240120662ed9SBarry Smith   rootMode = shared:
240220662ed9SBarry Smith   PetscSF Object: result_sf 2 MPI processes
240320662ed9SBarry Smith     type: basic
240420662ed9SBarry Smith     rank #leaves #roots
240520662ed9SBarry Smith     [ 0]       8      2
240620662ed9SBarry Smith     [ 1]       8      2
240720662ed9SBarry Smith     leaves      roots       roots in global numbering
240820662ed9SBarry Smith     ( 0,  0) <- ( 0,  0)  =   0
240920662ed9SBarry Smith     ( 0,  1) <- ( 0,  1)  =   1
241020662ed9SBarry Smith     ( 0,  2) <- ( 1,  0)  =   2
241120662ed9SBarry Smith     ( 0,  3) <- ( 1,  1)  =   3
241220662ed9SBarry Smith     ( 0,  4) <- ( 0,  0)  =   0
241320662ed9SBarry Smith     ( 0,  5) <- ( 0,  1)  =   1
241420662ed9SBarry Smith     ( 0,  6) <- ( 1,  0)  =   2
241520662ed9SBarry Smith     ( 0,  7) <- ( 1,  1)  =   3
241620662ed9SBarry Smith     ( 1,  0) <- ( 0,  0)  =   0
241720662ed9SBarry Smith     ( 1,  1) <- ( 0,  1)  =   1
241820662ed9SBarry Smith     ( 1,  2) <- ( 1,  0)  =   2
241920662ed9SBarry Smith     ( 1,  3) <- ( 1,  1)  =   3
242020662ed9SBarry Smith     ( 1,  4) <- ( 0,  0)  =   0
242120662ed9SBarry Smith     ( 1,  5) <- ( 0,  1)  =   1
242220662ed9SBarry Smith     ( 1,  6) <- ( 1,  0)  =   2
242320662ed9SBarry Smith     ( 1,  7) <- ( 1,  1)  =   3
242420662ed9SBarry Smith .ve
24251f40158dSVaclav Hapla 
24261f40158dSVaclav Hapla .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2427157edd7aSVaclav Hapla @*/
24281f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2429d71ae5a4SJacob Faibussowitsch {
2430157edd7aSVaclav Hapla   PetscInt     i, s, nLeaves, nRoots;
2431157edd7aSVaclav Hapla   PetscInt    *leafArrayOffsets;
2432157edd7aSVaclav Hapla   PetscInt    *ilocal_new;
2433157edd7aSVaclav Hapla   PetscSFNode *iremote_new;
2434157edd7aSVaclav Hapla   PetscBool    all_ilocal_null = PETSC_FALSE;
24351f40158dSVaclav Hapla   PetscLayout  glayout         = NULL;
24361f40158dSVaclav Hapla   PetscInt    *gremote         = NULL;
24371f40158dSVaclav Hapla   PetscMPIInt  rank, size;
2438157edd7aSVaclav Hapla 
2439157edd7aSVaclav Hapla   PetscFunctionBegin;
244012f479c1SVaclav Hapla   if (PetscDefined(USE_DEBUG)) {
2441157edd7aSVaclav Hapla     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2442157edd7aSVaclav Hapla 
24439566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, &dummy));
2444157edd7aSVaclav Hapla     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24454f572ea9SToby Isaac     PetscAssertPointer(sfs, 3);
2446157edd7aSVaclav Hapla     for (i = 0; i < nsfs; i++) {
2447157edd7aSVaclav Hapla       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2448157edd7aSVaclav Hapla       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2449157edd7aSVaclav Hapla     }
24501f40158dSVaclav Hapla     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24514f572ea9SToby Isaac     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24524f572ea9SToby Isaac     PetscAssertPointer(newsf, 6);
24539566063dSJacob Faibussowitsch     PetscCall(PetscSFDestroy(&dummy));
2454157edd7aSVaclav Hapla   }
2455157edd7aSVaclav Hapla   if (!nsfs) {
24569566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
24579566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24583ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2459157edd7aSVaclav Hapla   }
24609566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24611f40158dSVaclav Hapla   PetscCallMPI(MPI_Comm_size(comm, &size));
2462157edd7aSVaclav Hapla 
24631f40158dSVaclav Hapla   /* Calculate leaf array offsets */
24649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2465157edd7aSVaclav Hapla   leafArrayOffsets[0] = 0;
2466157edd7aSVaclav Hapla   for (s = 0; s < nsfs; s++) {
2467157edd7aSVaclav Hapla     PetscInt nl;
2468157edd7aSVaclav Hapla 
24699566063dSJacob Faibussowitsch     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2470157edd7aSVaclav Hapla     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2471157edd7aSVaclav Hapla   }
2472157edd7aSVaclav Hapla   nLeaves = leafArrayOffsets[nsfs];
2473157edd7aSVaclav Hapla 
24741f40158dSVaclav Hapla   /* Calculate number of roots */
24751f40158dSVaclav Hapla   switch (rootMode) {
24761f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
24771f40158dSVaclav Hapla     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
24781f40158dSVaclav Hapla     if (PetscDefined(USE_DEBUG)) {
24791f40158dSVaclav Hapla       for (s = 1; s < nsfs; s++) {
24801f40158dSVaclav Hapla         PetscInt nr;
24811f40158dSVaclav Hapla 
24821f40158dSVaclav Hapla         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
24831f40158dSVaclav Hapla         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
24841f40158dSVaclav Hapla       }
24851f40158dSVaclav Hapla     }
24861f40158dSVaclav Hapla   } break;
24871f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
24881f40158dSVaclav Hapla     /* Calculate also global layout in this case */
24891f40158dSVaclav Hapla     PetscInt    *nls;
24901f40158dSVaclav Hapla     PetscLayout *lts;
24911f40158dSVaclav Hapla     PetscInt   **inds;
24921f40158dSVaclav Hapla     PetscInt     j;
24931f40158dSVaclav Hapla     PetscInt     rootOffset = 0;
24941f40158dSVaclav Hapla 
24951f40158dSVaclav Hapla     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
24961f40158dSVaclav Hapla     PetscCall(PetscLayoutCreate(comm, &glayout));
24971f40158dSVaclav Hapla     glayout->bs = 1;
24981f40158dSVaclav Hapla     glayout->n  = 0;
24991f40158dSVaclav Hapla     glayout->N  = 0;
25001f40158dSVaclav Hapla     for (s = 0; s < nsfs; s++) {
25011f40158dSVaclav Hapla       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
25021f40158dSVaclav Hapla       glayout->n += lts[s]->n;
25031f40158dSVaclav Hapla       glayout->N += lts[s]->N;
25041f40158dSVaclav Hapla     }
25051f40158dSVaclav Hapla     PetscCall(PetscLayoutSetUp(glayout));
25061f40158dSVaclav Hapla     PetscCall(PetscMalloc1(nLeaves, &gremote));
25071f40158dSVaclav Hapla     for (s = 0, j = 0; s < nsfs; s++) {
25081f40158dSVaclav Hapla       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25091f40158dSVaclav Hapla       rootOffset += lts[s]->N;
25101f40158dSVaclav Hapla       PetscCall(PetscLayoutDestroy(&lts[s]));
25111f40158dSVaclav Hapla       PetscCall(PetscFree(inds[s]));
25121f40158dSVaclav Hapla     }
25131f40158dSVaclav Hapla     PetscCall(PetscFree3(lts, nls, inds));
25141f40158dSVaclav Hapla     nRoots = glayout->N;
25151f40158dSVaclav Hapla   } break;
25161f40158dSVaclav Hapla   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25171f40158dSVaclav Hapla     /* nRoots calculated later in this case */
25181f40158dSVaclav Hapla     break;
25191f40158dSVaclav Hapla   default:
25201f40158dSVaclav Hapla     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25211f40158dSVaclav Hapla   }
25221f40158dSVaclav Hapla 
2523157edd7aSVaclav Hapla   if (!leafOffsets) {
2524157edd7aSVaclav Hapla     all_ilocal_null = PETSC_TRUE;
2525157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2526157edd7aSVaclav Hapla       const PetscInt *ilocal;
2527157edd7aSVaclav Hapla 
25289566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2529157edd7aSVaclav Hapla       if (ilocal) {
2530157edd7aSVaclav Hapla         all_ilocal_null = PETSC_FALSE;
2531157edd7aSVaclav Hapla         break;
2532157edd7aSVaclav Hapla       }
2533157edd7aSVaclav Hapla     }
2534157edd7aSVaclav Hapla     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2535157edd7aSVaclav Hapla   }
2536157edd7aSVaclav Hapla 
2537157edd7aSVaclav Hapla   /* Renumber and concatenate local leaves */
2538157edd7aSVaclav Hapla   ilocal_new = NULL;
2539157edd7aSVaclav Hapla   if (!all_ilocal_null) {
25409566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2541157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2542157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2543157edd7aSVaclav Hapla       const PetscInt *ilocal;
25448e3a54c0SPierre Jolivet       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2545157edd7aSVaclav Hapla       PetscInt        i, nleaves_l;
2546157edd7aSVaclav Hapla 
25479566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2548157edd7aSVaclav Hapla       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2549157edd7aSVaclav Hapla     }
2550157edd7aSVaclav Hapla   }
2551157edd7aSVaclav Hapla 
2552157edd7aSVaclav Hapla   /* Renumber and concatenate remote roots */
25531f40158dSVaclav Hapla   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25541f40158dSVaclav Hapla     PetscInt rootOffset = 0;
25551f40158dSVaclav Hapla 
25569566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2557157edd7aSVaclav Hapla     for (i = 0; i < nLeaves; i++) {
2558157edd7aSVaclav Hapla       iremote_new[i].rank  = -1;
2559157edd7aSVaclav Hapla       iremote_new[i].index = -1;
2560157edd7aSVaclav Hapla     }
2561157edd7aSVaclav Hapla     for (s = 0; s < nsfs; s++) {
2562157edd7aSVaclav Hapla       PetscInt           i, nl, nr;
2563157edd7aSVaclav Hapla       PetscSF            tmp_sf;
2564157edd7aSVaclav Hapla       const PetscSFNode *iremote;
2565157edd7aSVaclav Hapla       PetscSFNode       *tmp_rootdata;
25668e3a54c0SPierre Jolivet       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2567157edd7aSVaclav Hapla 
25689566063dSJacob Faibussowitsch       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25699566063dSJacob Faibussowitsch       PetscCall(PetscSFCreate(comm, &tmp_sf));
2570157edd7aSVaclav Hapla       /* create helper SF with contiguous leaves */
25719566063dSJacob Faibussowitsch       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25729566063dSJacob Faibussowitsch       PetscCall(PetscSFSetUp(tmp_sf));
25739566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
25741f40158dSVaclav Hapla       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2575157edd7aSVaclav Hapla         for (i = 0; i < nr; i++) {
25761f40158dSVaclav Hapla           tmp_rootdata[i].index = i + rootOffset;
2577157edd7aSVaclav Hapla           tmp_rootdata[i].rank  = (PetscInt)rank;
2578157edd7aSVaclav Hapla         }
25791f40158dSVaclav Hapla         rootOffset += nr;
25801f40158dSVaclav Hapla       } else {
25811f40158dSVaclav Hapla         for (i = 0; i < nr; i++) {
25821f40158dSVaclav Hapla           tmp_rootdata[i].index = i;
25831f40158dSVaclav Hapla           tmp_rootdata[i].rank  = (PetscInt)rank;
25841f40158dSVaclav Hapla         }
25851f40158dSVaclav Hapla       }
25869566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
25879566063dSJacob Faibussowitsch       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
25889566063dSJacob Faibussowitsch       PetscCall(PetscSFDestroy(&tmp_sf));
25899566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp_rootdata));
2590157edd7aSVaclav Hapla     }
2591aa624791SPierre Jolivet     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2592157edd7aSVaclav Hapla 
2593157edd7aSVaclav Hapla     /* Build the new SF */
25949566063dSJacob Faibussowitsch     PetscCall(PetscSFCreate(comm, newsf));
25959566063dSJacob Faibussowitsch     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
25961f40158dSVaclav Hapla   } else {
25971f40158dSVaclav Hapla     /* Build the new SF */
25981f40158dSVaclav Hapla     PetscCall(PetscSFCreate(comm, newsf));
25991f40158dSVaclav Hapla     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
26001f40158dSVaclav Hapla   }
26019566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(*newsf));
26021f40158dSVaclav Hapla   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26031f40158dSVaclav Hapla   PetscCall(PetscLayoutDestroy(&glayout));
26041f40158dSVaclav Hapla   PetscCall(PetscFree(gremote));
26059566063dSJacob Faibussowitsch   PetscCall(PetscFree(leafArrayOffsets));
26063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2607157edd7aSVaclav Hapla }
2608