1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h>
4eec179cfSJacob Faibussowitsch #include <petsc/private/hashmapi.h>
595fce210SBarry Smith
67fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
7715b587bSJunchao Zhang #include <petscdevice_cuda.h>
87fd2d3dbSJunchao Zhang #endif
97fd2d3dbSJunchao Zhang
107fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
117fd2d3dbSJunchao Zhang #include <hip/hip_runtime.h>
127fd2d3dbSJunchao Zhang #endif
137fd2d3dbSJunchao Zhang
142abc8c78SJacob Faibussowitsch #if defined(PETSC_CLANG_STATIC_ANALYZER)
154bf303faSJacob Faibussowitsch extern void PetscSFCheckGraphSet(PetscSF, int);
162abc8c78SJacob Faibussowitsch #else
1795fce210SBarry Smith #if defined(PETSC_USE_DEBUG)
18a8f51744SPierre Jolivet #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
1995fce210SBarry Smith #else
209371c9d4SSatish Balay #define PetscSFCheckGraphSet(sf, arg) \
219371c9d4SSatish Balay do { \
229371c9d4SSatish Balay } while (0)
2395fce210SBarry Smith #endif
242abc8c78SJacob Faibussowitsch #endif
2595fce210SBarry Smith
264c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
271f40158dSVaclav Hapla const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
2895fce210SBarry Smith
298af6ec1cSBarry Smith /*@
3095fce210SBarry Smith PetscSFCreate - create a star forest communication context
3195fce210SBarry Smith
32d083f849SBarry Smith Collective
3395fce210SBarry Smith
344165533cSJose E. Roman Input Parameter:
3595fce210SBarry Smith . comm - communicator on which the star forest will operate
3695fce210SBarry Smith
374165533cSJose E. Roman Output Parameter:
3895fce210SBarry Smith . sf - new star forest context
3995fce210SBarry Smith
4020662ed9SBarry Smith Options Database Key:
416677b1c1SJunchao Zhang + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default)
426677b1c1SJunchao Zhang . -sf_type window - Use MPI-3 one-sided window for communication
436677b1c1SJunchao Zhang . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication
446677b1c1SJunchao Zhang - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45dd5b3ca6SJunchao Zhang
4695fce210SBarry Smith Level: intermediate
4795fce210SBarry Smith
48cab54364SBarry Smith Note:
49cab54364SBarry Smith When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50cab54364SBarry Smith `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
512f04c522SBarry Smith `PetscSF`s are optimized and have better performance than the general `PetscSF`s.
52dd5b3ca6SJunchao Zhang
532f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
5495fce210SBarry Smith @*/
PetscSFCreate(MPI_Comm comm,PetscSF * sf)55d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56d71ae5a4SJacob Faibussowitsch {
5795fce210SBarry Smith PetscSF b;
5895fce210SBarry Smith
5995fce210SBarry Smith PetscFunctionBegin;
604f572ea9SToby Isaac PetscAssertPointer(sf, 2);
619566063dSJacob Faibussowitsch PetscCall(PetscSFInitializePackage());
6295fce210SBarry Smith
639566063dSJacob Faibussowitsch PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
6495fce210SBarry Smith b->nroots = -1;
6595fce210SBarry Smith b->nleaves = -1;
661690c2aeSBarry Smith b->minleaf = PETSC_INT_MAX;
671690c2aeSBarry Smith b->maxleaf = PETSC_INT_MIN;
6895fce210SBarry Smith b->nranks = -1;
6995fce210SBarry Smith b->rankorder = PETSC_TRUE;
7095fce210SBarry Smith b->ingroup = MPI_GROUP_NULL;
7195fce210SBarry Smith b->outgroup = MPI_GROUP_NULL;
7295fce210SBarry Smith b->graphset = PETSC_FALSE;
7320c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
7420c24465SJunchao Zhang b->use_gpu_aware_mpi = use_gpu_aware_mpi;
7520c24465SJunchao Zhang b->use_stream_aware_mpi = PETSC_FALSE;
7671438e86SJunchao Zhang b->unknown_input_stream = PETSC_FALSE;
7727f636e8SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
7820c24465SJunchao Zhang b->backend = PETSCSF_BACKEND_KOKKOS;
7927f636e8SJunchao Zhang #elif defined(PETSC_HAVE_CUDA)
8027f636e8SJunchao Zhang b->backend = PETSCSF_BACKEND_CUDA;
8159af0bd3SScott Kruger #elif defined(PETSC_HAVE_HIP)
8259af0bd3SScott Kruger b->backend = PETSCSF_BACKEND_HIP;
8320c24465SJunchao Zhang #endif
8471438e86SJunchao Zhang
8571438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM)
8671438e86SJunchao Zhang b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
8771438e86SJunchao Zhang b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
889566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
899566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
9071438e86SJunchao Zhang #endif
9120c24465SJunchao Zhang #endif
9260c22052SBarry Smith b->vscat.from_n = -1;
9360c22052SBarry Smith b->vscat.to_n = -1;
9460c22052SBarry Smith b->vscat.unit = MPIU_SCALAR;
9595fce210SBarry Smith *sf = b;
963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
9795fce210SBarry Smith }
9895fce210SBarry Smith
9929046d53SLisandro Dalcin /*@
10095fce210SBarry Smith PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
10195fce210SBarry Smith
10295fce210SBarry Smith Collective
10395fce210SBarry Smith
1044165533cSJose E. Roman Input Parameter:
10595fce210SBarry Smith . sf - star forest
10695fce210SBarry Smith
10795fce210SBarry Smith Level: advanced
10895fce210SBarry Smith
1092f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
11095fce210SBarry Smith @*/
PetscSFReset(PetscSF sf)111d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReset(PetscSF sf)
112d71ae5a4SJacob Faibussowitsch {
11395fce210SBarry Smith PetscFunctionBegin;
11495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Reset);
1160dd791a8SStefano Zampini PetscCall(PetscSFDestroy(&sf->rankssf));
1170dd791a8SStefano Zampini
11829046d53SLisandro Dalcin sf->nroots = -1;
11929046d53SLisandro Dalcin sf->nleaves = -1;
1201690c2aeSBarry Smith sf->minleaf = PETSC_INT_MAX;
1211690c2aeSBarry Smith sf->maxleaf = PETSC_INT_MIN;
12295fce210SBarry Smith sf->mine = NULL;
12395fce210SBarry Smith sf->remote = NULL;
12429046d53SLisandro Dalcin sf->graphset = PETSC_FALSE;
1259566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->mine_alloc));
1269566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->remote_alloc));
12721c688dcSJed Brown sf->nranks = -1;
1289566063dSJacob Faibussowitsch PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
12929046d53SLisandro Dalcin sf->degreeknown = PETSC_FALSE;
1309566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degree));
1319566063dSJacob Faibussowitsch if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
1329566063dSJacob Faibussowitsch if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
1330dd791a8SStefano Zampini
134013b3241SStefano Zampini if (sf->multi) sf->multi->multi = NULL;
1359566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf->multi));
1360dd791a8SStefano Zampini
1379566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sf->map));
13871438e86SJunchao Zhang
13971438e86SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
1409566063dSJacob Faibussowitsch for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
14171438e86SJunchao Zhang #endif
14271438e86SJunchao Zhang
14395fce210SBarry Smith sf->setupcalled = PETSC_FALSE;
1443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
14595fce210SBarry Smith }
14695fce210SBarry Smith
147cc4c1da9SBarry Smith /*@
148cab54364SBarry Smith PetscSFSetType - Set the `PetscSF` communication implementation
14995fce210SBarry Smith
150c3339decSBarry Smith Collective
15195fce210SBarry Smith
15295fce210SBarry Smith Input Parameters:
153cab54364SBarry Smith + sf - the `PetscSF` context
15495fce210SBarry Smith - type - a known method
155cab54364SBarry Smith .vb
156cab54364SBarry Smith PETSCSFWINDOW - MPI-2/3 one-sided
157cab54364SBarry Smith PETSCSFBASIC - basic implementation using MPI-1 two-sided
158cab54364SBarry Smith .ve
15995fce210SBarry Smith
16095fce210SBarry Smith Options Database Key:
16120662ed9SBarry Smith . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162cab54364SBarry Smith
163cab54364SBarry Smith Level: intermediate
16495fce210SBarry Smith
16595fce210SBarry Smith Notes:
16620662ed9SBarry Smith See `PetscSFType` for possible values
16795fce210SBarry Smith
1682f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`
16995fce210SBarry Smith @*/
PetscSFSetType(PetscSF sf,PetscSFType type)170d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171d71ae5a4SJacob Faibussowitsch {
17295fce210SBarry Smith PetscBool match;
1735f80ce2aSJacob Faibussowitsch PetscErrorCode (*r)(PetscSF);
17495fce210SBarry Smith
17595fce210SBarry Smith PetscFunctionBegin;
17695fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1774f572ea9SToby Isaac PetscAssertPointer(type, 2);
17895fce210SBarry Smith
1799566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
1803ba16761SJacob Faibussowitsch if (match) PetscFunctionReturn(PETSC_SUCCESS);
18195fce210SBarry Smith
1829566063dSJacob Faibussowitsch PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
1836adde796SStefano Zampini PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
18429046d53SLisandro Dalcin /* Destroy the previous PetscSF implementation context */
185dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Destroy);
1869566063dSJacob Faibussowitsch PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
1879566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
1889566063dSJacob Faibussowitsch PetscCall((*r)(sf));
1893ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
19095fce210SBarry Smith }
19195fce210SBarry Smith
192cc4c1da9SBarry Smith /*@
193cab54364SBarry Smith PetscSFGetType - Get the `PetscSF` communication implementation
19429046d53SLisandro Dalcin
19529046d53SLisandro Dalcin Not Collective
19629046d53SLisandro Dalcin
19729046d53SLisandro Dalcin Input Parameter:
198cab54364SBarry Smith . sf - the `PetscSF` context
19929046d53SLisandro Dalcin
20029046d53SLisandro Dalcin Output Parameter:
201cab54364SBarry Smith . type - the `PetscSF` type name
20229046d53SLisandro Dalcin
20329046d53SLisandro Dalcin Level: intermediate
20429046d53SLisandro Dalcin
2052f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
20629046d53SLisandro Dalcin @*/
PetscSFGetType(PetscSF sf,PetscSFType * type)207d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208d71ae5a4SJacob Faibussowitsch {
20929046d53SLisandro Dalcin PetscFunctionBegin;
21029046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2114f572ea9SToby Isaac PetscAssertPointer(type, 2);
21229046d53SLisandro Dalcin *type = ((PetscObject)sf)->type_name;
2133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
21429046d53SLisandro Dalcin }
21529046d53SLisandro Dalcin
2160764c050SBarry Smith /*@
21720662ed9SBarry Smith PetscSFDestroy - destroy a star forest
21895fce210SBarry Smith
21995fce210SBarry Smith Collective
22095fce210SBarry Smith
2214165533cSJose E. Roman Input Parameter:
22295fce210SBarry Smith . sf - address of star forest
22395fce210SBarry Smith
22495fce210SBarry Smith Level: intermediate
22595fce210SBarry Smith
2262f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
22795fce210SBarry Smith @*/
PetscSFDestroy(PetscSF * sf)228d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDestroy(PetscSF *sf)
229d71ae5a4SJacob Faibussowitsch {
23095fce210SBarry Smith PetscFunctionBegin;
2313ba16761SJacob Faibussowitsch if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232f4f49eeaSPierre Jolivet PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233f4f49eeaSPierre Jolivet if (--((PetscObject)*sf)->refct > 0) {
2349371c9d4SSatish Balay *sf = NULL;
2353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2369371c9d4SSatish Balay }
2379566063dSJacob Faibussowitsch PetscCall(PetscSFReset(*sf));
238f4f49eeaSPierre Jolivet PetscTryTypeMethod(*sf, Destroy);
2399566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
2409566063dSJacob Faibussowitsch if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241c02794c0SJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242715b587bSJunchao Zhang if ((*sf)->use_stream_aware_mpi) {
243715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244715b587bSJunchao Zhang PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245715b587bSJunchao Zhang }
246715b587bSJunchao Zhang #endif
2479566063dSJacob Faibussowitsch PetscCall(PetscHeaderDestroy(sf));
2483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
24995fce210SBarry Smith }
25095fce210SBarry Smith
PetscSFCheckGraphValid_Private(PetscSF sf)251d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252d71ae5a4SJacob Faibussowitsch {
253c4e6a40aSLawrence Mitchell PetscInt i, nleaves;
254c4e6a40aSLawrence Mitchell PetscMPIInt size;
255c4e6a40aSLawrence Mitchell const PetscInt *ilocal;
256c4e6a40aSLawrence Mitchell const PetscSFNode *iremote;
257c4e6a40aSLawrence Mitchell
258c4e6a40aSLawrence Mitchell PetscFunctionBegin;
2593ba16761SJacob Faibussowitsch if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
2609566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
2619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262c4e6a40aSLawrence Mitchell for (i = 0; i < nleaves; i++) {
263c4e6a40aSLawrence Mitchell const PetscInt rank = iremote[i].rank;
264c4e6a40aSLawrence Mitchell const PetscInt remote = iremote[i].index;
265c4e6a40aSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i;
266c9cc58a2SBarry Smith PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
26708401ef6SPierre Jolivet PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
26808401ef6SPierre Jolivet PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269c4e6a40aSLawrence Mitchell }
2703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
271c4e6a40aSLawrence Mitchell }
272c4e6a40aSLawrence Mitchell
27395fce210SBarry Smith /*@
27420662ed9SBarry Smith PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
27595fce210SBarry Smith
27695fce210SBarry Smith Collective
27795fce210SBarry Smith
2784165533cSJose E. Roman Input Parameter:
27995fce210SBarry Smith . sf - star forest communication object
28095fce210SBarry Smith
28195fce210SBarry Smith Level: beginner
28295fce210SBarry Smith
2832f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
28495fce210SBarry Smith @*/
PetscSFSetUp(PetscSF sf)285d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUp(PetscSF sf)
286d71ae5a4SJacob Faibussowitsch {
28795fce210SBarry Smith PetscFunctionBegin;
28829046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
28929046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1);
2903ba16761SJacob Faibussowitsch if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
2919566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
2929566063dSJacob Faibussowitsch PetscCall(PetscSFCheckGraphValid_Private(sf));
2939566063dSJacob Faibussowitsch if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetUp);
29520c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
29620c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_CUDA) {
29771438e86SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_CUDA;
29871438e86SJunchao Zhang sf->ops->Free = PetscSFFree_CUDA;
29920c24465SJunchao Zhang }
30020c24465SJunchao Zhang #endif
30159af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP)
30259af0bd3SScott Kruger if (sf->backend == PETSCSF_BACKEND_HIP) {
30359af0bd3SScott Kruger sf->ops->Malloc = PetscSFMalloc_HIP;
30459af0bd3SScott Kruger sf->ops->Free = PetscSFFree_HIP;
30559af0bd3SScott Kruger }
30659af0bd3SScott Kruger #endif
30720c24465SJunchao Zhang
30820c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
30920c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31020c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Kokkos;
31120c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Kokkos;
31220c24465SJunchao Zhang }
31320c24465SJunchao Zhang #endif
3149566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
31595fce210SBarry Smith sf->setupcalled = PETSC_TRUE;
3163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
31795fce210SBarry Smith }
31895fce210SBarry Smith
3198af6ec1cSBarry Smith /*@
320cab54364SBarry Smith PetscSFSetFromOptions - set `PetscSF` options using the options database
32195fce210SBarry Smith
32295fce210SBarry Smith Logically Collective
32395fce210SBarry Smith
3244165533cSJose E. Roman Input Parameter:
32595fce210SBarry Smith . sf - star forest
32695fce210SBarry Smith
32795fce210SBarry Smith Options Database Keys:
32820662ed9SBarry Smith + -sf_type - implementation type, see `PetscSFSetType()`
3292f04c522SBarry Smith . -sf_rank_order - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise
33020662ed9SBarry Smith . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
33120662ed9SBarry Smith use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
33220662ed9SBarry Smith If true, this option only works with `-use_gpu_aware_mpi 1`.
33320662ed9SBarry Smith . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
33420662ed9SBarry Smith If true, this option only works with `-use_gpu_aware_mpi 1`.
33595fce210SBarry Smith
3366497c311SBarry Smith - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
33759af0bd3SScott Kruger On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
33820c24465SJunchao Zhang the only available is kokkos.
33920c24465SJunchao Zhang
34095fce210SBarry Smith Level: intermediate
341cab54364SBarry Smith
3422f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
34395fce210SBarry Smith @*/
PetscSFSetFromOptions(PetscSF sf)344d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345d71ae5a4SJacob Faibussowitsch {
34695fce210SBarry Smith PetscSFType deft;
34795fce210SBarry Smith char type[256];
34895fce210SBarry Smith PetscBool flg;
34995fce210SBarry Smith
35095fce210SBarry Smith PetscFunctionBegin;
35195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352d0609cedSBarry Smith PetscObjectOptionsBegin((PetscObject)sf);
35395fce210SBarry Smith deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
3549566063dSJacob Faibussowitsch PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
3559566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, flg ? type : deft));
3569566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357f9334340SJunchao Zhang PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
3587fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
35920c24465SJunchao Zhang {
36020c24465SJunchao Zhang char backendstr[32] = {0};
36159af0bd3SScott Kruger PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
36220c24465SJunchao Zhang /* Change the defaults set in PetscSFCreate() with command line options */
363d5b43468SJose E. Roman PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
3649566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
3659566063dSJacob Faibussowitsch PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
3669566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
3679566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
3689566063dSJacob Faibussowitsch PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
36959af0bd3SScott Kruger #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
37020c24465SJunchao Zhang if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37120c24465SJunchao Zhang else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37259af0bd3SScott Kruger else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
37328b400f6SJacob Faibussowitsch else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
37420c24465SJunchao Zhang #elif defined(PETSC_HAVE_KOKKOS)
37508401ef6SPierre Jolivet PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37620c24465SJunchao Zhang #endif
377715b587bSJunchao Zhang
378715b587bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379715b587bSJunchao Zhang if (sf->use_stream_aware_mpi) {
380715b587bSJunchao Zhang MPI_Info info;
381715b587bSJunchao Zhang
382715b587bSJunchao Zhang PetscCallMPI(MPI_Info_create(&info));
383715b587bSJunchao Zhang PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384715b587bSJunchao Zhang PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386715b587bSJunchao Zhang PetscCallMPI(MPI_Info_free(&info));
387715b587bSJunchao Zhang PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388715b587bSJunchao Zhang }
389715b587bSJunchao Zhang #endif
39020c24465SJunchao Zhang }
391c2a741eeSJunchao Zhang #endif
392dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393d0609cedSBarry Smith PetscOptionsEnd();
3943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
39595fce210SBarry Smith }
39695fce210SBarry Smith
39729046d53SLisandro Dalcin /*@
3982f04c522SBarry Smith PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order
39995fce210SBarry Smith
40095fce210SBarry Smith Logically Collective
40195fce210SBarry Smith
4024165533cSJose E. Roman Input Parameters:
40395fce210SBarry Smith + sf - star forest
4042f04c522SBarry Smith - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic)
40595fce210SBarry Smith
40695fce210SBarry Smith Level: advanced
40795fce210SBarry Smith
4082f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
40995fce210SBarry Smith @*/
PetscSFSetRankOrder(PetscSF sf,PetscBool flg)410d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411d71ae5a4SJacob Faibussowitsch {
41295fce210SBarry Smith PetscFunctionBegin;
41395fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
41495fce210SBarry Smith PetscValidLogicalCollectiveBool(sf, flg, 2);
41528b400f6SJacob Faibussowitsch PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
41695fce210SBarry Smith sf->rankorder = flg;
4173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
41895fce210SBarry Smith }
41995fce210SBarry Smith
4205d83a8b1SBarry Smith /*@
42195fce210SBarry Smith PetscSFSetGraph - Set a parallel star forest
42295fce210SBarry Smith
42395fce210SBarry Smith Collective
42495fce210SBarry Smith
4254165533cSJose E. Roman Input Parameters:
42695fce210SBarry Smith + sf - star forest
4272f04c522SBarry Smith . nroots - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves)
4282f04c522SBarry Smith . nleaves - number of leaf vertices on the current MPI process, each of these references a root on any process
4292f04c522SBarry Smith . ilocal - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1))
43020662ed9SBarry Smith . localmode - copy mode for `ilocal`
4312f04c522SBarry Smith . iremote - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode)
43220662ed9SBarry Smith - remotemode - copy mode for `iremote`
43395fce210SBarry Smith
43495fce210SBarry Smith Level: intermediate
43595fce210SBarry Smith
43695452b02SPatrick Sanan Notes:
43720662ed9SBarry Smith Leaf indices in `ilocal` must be unique, otherwise an error occurs.
43838ab3f8aSBarry Smith
43920662ed9SBarry Smith Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
44020662ed9SBarry Smith In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
441db2b9530SVaclav Hapla PETSc might modify the respective array;
44220662ed9SBarry Smith if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
4432f04c522SBarry Smith If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
4442f04c522SBarry Smith
4452f04c522SBarry Smith `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations.
446db2b9530SVaclav Hapla
44738b5cf2dSJacob Faibussowitsch Fortran Notes:
44820662ed9SBarry Smith In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449c4e6a40aSLawrence Mitchell
45038b5cf2dSJacob Faibussowitsch Developer Notes:
451db2b9530SVaclav Hapla We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
45220662ed9SBarry Smith This also allows to compare leaf sets of two `PetscSF`s easily.
45372bf8598SVaclav Hapla
4542f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()`
45595fce210SBarry Smith @*/
PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt ilocal[],PetscCopyMode localmode,PetscSFNode iremote[],PetscCopyMode remotemode)4569c9354e5SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457d71ae5a4SJacob Faibussowitsch {
458db2b9530SVaclav Hapla PetscBool unique, contiguous;
45995fce210SBarry Smith
46095fce210SBarry Smith PetscFunctionBegin;
46195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
4624f572ea9SToby Isaac if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
4634f572ea9SToby Isaac if (nleaves > 0) PetscAssertPointer(iremote, 6);
46408401ef6SPierre Jolivet PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
46508401ef6SPierre Jolivet PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
4668da24d32SBarry Smith /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
4678da24d32SBarry Smith * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
4688da24d32SBarry Smith PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
4698da24d32SBarry Smith PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
47029046d53SLisandro Dalcin
4712a67d2daSStefano Zampini if (sf->nroots >= 0) { /* Reset only if graph already set */
4729566063dSJacob Faibussowitsch PetscCall(PetscSFReset(sf));
4732a67d2daSStefano Zampini }
4742a67d2daSStefano Zampini
4759566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
4766497c311SBarry Smith if (PetscDefined(USE_DEBUG)) {
4776497c311SBarry Smith PetscMPIInt size;
4786497c311SBarry Smith
4796497c311SBarry Smith PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480ac530a7eSPierre Jolivet for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
4816497c311SBarry Smith }
48229046d53SLisandro Dalcin
48395fce210SBarry Smith sf->nroots = nroots;
48495fce210SBarry Smith sf->nleaves = nleaves;
48529046d53SLisandro Dalcin
486db2b9530SVaclav Hapla if (localmode == PETSC_COPY_VALUES && ilocal) {
487db2b9530SVaclav Hapla PetscInt *tlocal = NULL;
488db2b9530SVaclav Hapla
4899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tlocal));
4909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491db2b9530SVaclav Hapla ilocal = tlocal;
492db2b9530SVaclav Hapla }
493db2b9530SVaclav Hapla if (remotemode == PETSC_COPY_VALUES) {
494db2b9530SVaclav Hapla PetscSFNode *tremote = NULL;
495db2b9530SVaclav Hapla
4969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nleaves, &tremote));
4979566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498db2b9530SVaclav Hapla iremote = tremote;
499db2b9530SVaclav Hapla }
500db2b9530SVaclav Hapla
50129046d53SLisandro Dalcin if (nleaves && ilocal) {
502db2b9530SVaclav Hapla PetscSFNode work;
503db2b9530SVaclav Hapla
5049566063dSJacob Faibussowitsch PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
5059566063dSJacob Faibussowitsch PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506db2b9530SVaclav Hapla unique = PetscNot(unique);
507db2b9530SVaclav Hapla PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508db2b9530SVaclav Hapla sf->minleaf = ilocal[0];
509db2b9530SVaclav Hapla sf->maxleaf = ilocal[nleaves - 1];
510db2b9530SVaclav Hapla contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
51129046d53SLisandro Dalcin } else {
51229046d53SLisandro Dalcin sf->minleaf = 0;
51329046d53SLisandro Dalcin sf->maxleaf = nleaves - 1;
514db2b9530SVaclav Hapla unique = PETSC_TRUE;
515db2b9530SVaclav Hapla contiguous = PETSC_TRUE;
51629046d53SLisandro Dalcin }
51729046d53SLisandro Dalcin
518db2b9530SVaclav Hapla if (contiguous) {
519db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) {
520db2b9530SVaclav Hapla ilocal = NULL;
521db2b9530SVaclav Hapla } else {
5229566063dSJacob Faibussowitsch PetscCall(PetscFree(ilocal));
523db2b9530SVaclav Hapla }
524db2b9530SVaclav Hapla }
525db2b9530SVaclav Hapla sf->mine = ilocal;
526db2b9530SVaclav Hapla if (localmode == PETSC_USE_POINTER) {
52729046d53SLisandro Dalcin sf->mine_alloc = NULL;
528db2b9530SVaclav Hapla } else {
529db2b9530SVaclav Hapla sf->mine_alloc = ilocal;
53095fce210SBarry Smith }
5316497c311SBarry Smith if (PetscDefined(USE_DEBUG)) {
5326497c311SBarry Smith PetscMPIInt size;
5336497c311SBarry Smith
5346497c311SBarry Smith PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535ac530a7eSPierre Jolivet for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
5366497c311SBarry Smith }
537db2b9530SVaclav Hapla sf->remote = iremote;
538db2b9530SVaclav Hapla if (remotemode == PETSC_USE_POINTER) {
53929046d53SLisandro Dalcin sf->remote_alloc = NULL;
540db2b9530SVaclav Hapla } else {
541db2b9530SVaclav Hapla sf->remote_alloc = iremote;
54295fce210SBarry Smith }
5439566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
54429046d53SLisandro Dalcin sf->graphset = PETSC_TRUE;
5453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
54695fce210SBarry Smith }
54795fce210SBarry Smith
54829046d53SLisandro Dalcin /*@
549cab54364SBarry Smith PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550dd5b3ca6SJunchao Zhang
551dd5b3ca6SJunchao Zhang Collective
552dd5b3ca6SJunchao Zhang
553dd5b3ca6SJunchao Zhang Input Parameters:
554cab54364SBarry Smith + sf - The `PetscSF`
5552f04c522SBarry Smith . map - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556cab54364SBarry Smith - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557cab54364SBarry Smith
558cab54364SBarry Smith Level: intermediate
559dd5b3ca6SJunchao Zhang
560dd5b3ca6SJunchao Zhang Notes:
5612f04c522SBarry Smith It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`.
5622f04c522SBarry Smith `n` and `N` are the local and global sizes of `root` respectively.
563dd5b3ca6SJunchao Zhang
5642f04c522SBarry Smith With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to
5652f04c522SBarry Smith sequential vectors `leaves` on all MPI processes.
566dd5b3ca6SJunchao Zhang
5672f04c522SBarry Smith With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a
5682f04c522SBarry Smith sequential vector `leaves` on MPI rank 0.
569dd5b3ca6SJunchao Zhang
5702f04c522SBarry Smith With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine
5712f04c522SBarry Smith creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i
572cab54364SBarry Smith of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
5732f04c522SBarry Smith not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data
5742f04c522SBarry Smith items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric.
575dd5b3ca6SJunchao Zhang
5762f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
577dd5b3ca6SJunchao Zhang @*/
PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)578d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
579d71ae5a4SJacob Faibussowitsch {
580dd5b3ca6SJunchao Zhang MPI_Comm comm;
581dd5b3ca6SJunchao Zhang PetscInt n, N, res[2];
582dd5b3ca6SJunchao Zhang PetscMPIInt rank, size;
583dd5b3ca6SJunchao Zhang PetscSFType type;
584dd5b3ca6SJunchao Zhang
585dd5b3ca6SJunchao Zhang PetscFunctionBegin;
5862abc8c78SJacob Faibussowitsch PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
58718aa8208SJames Wright PetscValidLogicalCollectiveEnum(sf, pattern, 3);
5884f572ea9SToby Isaac if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
5899566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
5902c71b3e2SJacob Faibussowitsch PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
5919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
5929566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
593dd5b3ca6SJunchao Zhang
594dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLTOALL) {
595835f2295SStefano Zampini PetscInt sizei = size;
596835f2295SStefano Zampini
597dd5b3ca6SJunchao Zhang type = PETSCSFALLTOALL;
5989566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &sf->map));
5999566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetLocalSize(sf->map, size));
600835f2295SStefano Zampini PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
6019566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(sf->map));
602dd5b3ca6SJunchao Zhang } else {
6039566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetLocalSize(map, &n));
6049566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetSize(map, &N));
605dd5b3ca6SJunchao Zhang res[0] = n;
606dd5b3ca6SJunchao Zhang res[1] = -n;
607dd5b3ca6SJunchao Zhang /* Check if n are same over all ranks so that we can optimize it */
608462c564dSBarry Smith PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
609dd5b3ca6SJunchao Zhang if (res[0] == -res[1]) { /* same n */
610dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
611dd5b3ca6SJunchao Zhang } else {
612dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
613dd5b3ca6SJunchao Zhang }
6149566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(map, &sf->map));
615dd5b3ca6SJunchao Zhang }
6169566063dSJacob Faibussowitsch PetscCall(PetscSFSetType(sf, type));
617dd5b3ca6SJunchao Zhang
618dd5b3ca6SJunchao Zhang sf->pattern = pattern;
619dd5b3ca6SJunchao Zhang sf->mine = NULL; /* Contiguous */
620dd5b3ca6SJunchao Zhang
621dd5b3ca6SJunchao Zhang /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
622dd5b3ca6SJunchao Zhang Also set other easy stuff.
623dd5b3ca6SJunchao Zhang */
624dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLGATHER) {
625dd5b3ca6SJunchao Zhang sf->nleaves = N;
626dd5b3ca6SJunchao Zhang sf->nroots = n;
627dd5b3ca6SJunchao Zhang sf->nranks = size;
628dd5b3ca6SJunchao Zhang sf->minleaf = 0;
629dd5b3ca6SJunchao Zhang sf->maxleaf = N - 1;
630dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_GATHER) {
631dd5b3ca6SJunchao Zhang sf->nleaves = rank ? 0 : N;
632dd5b3ca6SJunchao Zhang sf->nroots = n;
633dd5b3ca6SJunchao Zhang sf->nranks = rank ? 0 : size;
634dd5b3ca6SJunchao Zhang sf->minleaf = 0;
635dd5b3ca6SJunchao Zhang sf->maxleaf = rank ? -1 : N - 1;
636dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
637dd5b3ca6SJunchao Zhang sf->nleaves = size;
638dd5b3ca6SJunchao Zhang sf->nroots = size;
639dd5b3ca6SJunchao Zhang sf->nranks = size;
640dd5b3ca6SJunchao Zhang sf->minleaf = 0;
641dd5b3ca6SJunchao Zhang sf->maxleaf = size - 1;
642dd5b3ca6SJunchao Zhang }
643dd5b3ca6SJunchao Zhang sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
644dd5b3ca6SJunchao Zhang sf->graphset = PETSC_TRUE;
6453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
646dd5b3ca6SJunchao Zhang }
647dd5b3ca6SJunchao Zhang
648dd5b3ca6SJunchao Zhang /*@
6492f04c522SBarry Smith PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map
65095fce210SBarry Smith
65195fce210SBarry Smith Collective
65295fce210SBarry Smith
6534165533cSJose E. Roman Input Parameter:
65495fce210SBarry Smith . sf - star forest to invert
65595fce210SBarry Smith
6564165533cSJose E. Roman Output Parameter:
65720662ed9SBarry Smith . isf - inverse of `sf`
6584165533cSJose E. Roman
65995fce210SBarry Smith Level: advanced
66095fce210SBarry Smith
66195fce210SBarry Smith Notes:
66295fce210SBarry Smith All roots must have degree 1.
66395fce210SBarry Smith
66495fce210SBarry Smith The local space may be a permutation, but cannot be sparse.
66595fce210SBarry Smith
6662f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
66795fce210SBarry Smith @*/
PetscSFCreateInverseSF(PetscSF sf,PetscSF * isf)668d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
669d71ae5a4SJacob Faibussowitsch {
67095fce210SBarry Smith PetscMPIInt rank;
67195fce210SBarry Smith PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
67295fce210SBarry Smith const PetscInt *ilocal;
67395fce210SBarry Smith PetscSFNode *roots, *leaves;
67495fce210SBarry Smith
67595fce210SBarry Smith PetscFunctionBegin;
67629046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
67729046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1);
6784f572ea9SToby Isaac PetscAssertPointer(isf, 2);
67929046d53SLisandro Dalcin
6809566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
68129046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
68229046d53SLisandro Dalcin
6839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
6849566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
685ae9aee6dSMatthew G. Knepley for (i = 0; i < maxlocal; i++) {
68695fce210SBarry Smith leaves[i].rank = rank;
68795fce210SBarry Smith leaves[i].index = i;
68895fce210SBarry Smith }
68995fce210SBarry Smith for (i = 0; i < nroots; i++) {
69095fce210SBarry Smith roots[i].rank = -1;
69195fce210SBarry Smith roots[i].index = -1;
69295fce210SBarry Smith }
6936497c311SBarry Smith PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
6946497c311SBarry Smith PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
69595fce210SBarry Smith
69695fce210SBarry Smith /* Check whether our leaves are sparse */
6979371c9d4SSatish Balay for (i = 0, count = 0; i < nroots; i++)
6989371c9d4SSatish Balay if (roots[i].rank >= 0) count++;
69995fce210SBarry Smith if (count == nroots) newilocal = NULL;
7002b45a7c7SJames Wright else { // Index for sparse leaves and compact "roots" array (which is to become our leaves).
7012b45a7c7SJames Wright PetscCall(PetscMalloc1(count, &newilocal));
70295fce210SBarry Smith for (i = 0, count = 0; i < nroots; i++) {
70395fce210SBarry Smith if (roots[i].rank >= 0) {
70495fce210SBarry Smith newilocal[count] = i;
70595fce210SBarry Smith roots[count].rank = roots[i].rank;
70695fce210SBarry Smith roots[count].index = roots[i].index;
70795fce210SBarry Smith count++;
70895fce210SBarry Smith }
70995fce210SBarry Smith }
71095fce210SBarry Smith }
71195fce210SBarry Smith
7129566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
7139566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
7149566063dSJacob Faibussowitsch PetscCall(PetscFree2(roots, leaves));
7153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
71695fce210SBarry Smith }
71795fce210SBarry Smith
71895fce210SBarry Smith /*@
719cab54364SBarry Smith PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
72095fce210SBarry Smith
72195fce210SBarry Smith Collective
72295fce210SBarry Smith
7234165533cSJose E. Roman Input Parameters:
72495fce210SBarry Smith + sf - communication object to duplicate
725cab54364SBarry Smith - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
72695fce210SBarry Smith
7274165533cSJose E. Roman Output Parameter:
72895fce210SBarry Smith . newsf - new communication object
72995fce210SBarry Smith
73095fce210SBarry Smith Level: beginner
73195fce210SBarry Smith
7322f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
73395fce210SBarry Smith @*/
PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF * newsf)734d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
735d71ae5a4SJacob Faibussowitsch {
73629046d53SLisandro Dalcin PetscSFType type;
73797929ea7SJunchao Zhang MPI_Datatype dtype = MPIU_SCALAR;
73895fce210SBarry Smith
73995fce210SBarry Smith PetscFunctionBegin;
74029046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
74129046d53SLisandro Dalcin PetscValidLogicalCollectiveEnum(sf, opt, 2);
7424f572ea9SToby Isaac PetscAssertPointer(newsf, 3);
7439566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
7449566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type));
7459566063dSJacob Faibussowitsch if (type) PetscCall(PetscSFSetType(*newsf, type));
74635cb6cd3SPierre Jolivet (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
74795fce210SBarry Smith if (opt == PETSCSF_DUPLICATE_GRAPH) {
748dd5b3ca6SJunchao Zhang PetscSFCheckGraphSet(sf, 1);
749dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
75095fce210SBarry Smith PetscInt nroots, nleaves;
75195fce210SBarry Smith const PetscInt *ilocal;
75295fce210SBarry Smith const PetscSFNode *iremote;
7539566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
7549566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
755dd5b3ca6SJunchao Zhang } else {
7569566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
757dd5b3ca6SJunchao Zhang }
75895fce210SBarry Smith }
75997929ea7SJunchao Zhang /* Since oldtype is committed, so is newtype, according to MPI */
7609566063dSJacob Faibussowitsch if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
76197929ea7SJunchao Zhang (*newsf)->vscat.bs = sf->vscat.bs;
76297929ea7SJunchao Zhang (*newsf)->vscat.unit = dtype;
76397929ea7SJunchao Zhang (*newsf)->vscat.to_n = sf->vscat.to_n;
76497929ea7SJunchao Zhang (*newsf)->vscat.from_n = sf->vscat.from_n;
76597929ea7SJunchao Zhang /* Do not copy lsf. Build it on demand since it is rarely used */
76697929ea7SJunchao Zhang
76720c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
76820c24465SJunchao Zhang (*newsf)->backend = sf->backend;
76971438e86SJunchao Zhang (*newsf)->unknown_input_stream = sf->unknown_input_stream;
77020c24465SJunchao Zhang (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
77120c24465SJunchao Zhang (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
77220c24465SJunchao Zhang #endif
773dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
77420c24465SJunchao Zhang /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
7753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
77695fce210SBarry Smith }
77795fce210SBarry Smith
77895fce210SBarry Smith /*@C
77995fce210SBarry Smith PetscSFGetGraph - Get the graph specifying a parallel star forest
78095fce210SBarry Smith
78195fce210SBarry Smith Not Collective
78295fce210SBarry Smith
7834165533cSJose E. Roman Input Parameter:
78495fce210SBarry Smith . sf - star forest
78595fce210SBarry Smith
7864165533cSJose E. Roman Output Parameters:
7872f04c522SBarry Smith + nroots - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves)
7882f04c522SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
78920662ed9SBarry Smith . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
79095fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process
79195fce210SBarry Smith
792cab54364SBarry Smith Level: intermediate
793cab54364SBarry Smith
794373e0d91SLisandro Dalcin Notes:
79520662ed9SBarry Smith We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
796373e0d91SLisandro Dalcin
79720662ed9SBarry Smith The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
798db2b9530SVaclav Hapla
799ce78bad3SBarry Smith Fortran Note:
800ce78bad3SBarry Smith Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
801ca797d7aSLawrence Mitchell
8022f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
80395fce210SBarry Smith @*/
PetscSFGetGraph(PetscSF sf,PetscInt * nroots,PetscInt * nleaves,const PetscInt * ilocal[],const PetscSFNode * iremote[])804ce78bad3SBarry Smith PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
805d71ae5a4SJacob Faibussowitsch {
80695fce210SBarry Smith PetscFunctionBegin;
80795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
808b8dee149SJunchao Zhang if (sf->ops->GetGraph) {
809f4f49eeaSPierre Jolivet PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
810b8dee149SJunchao Zhang } else {
81195fce210SBarry Smith if (nroots) *nroots = sf->nroots;
81295fce210SBarry Smith if (nleaves) *nleaves = sf->nleaves;
81395fce210SBarry Smith if (ilocal) *ilocal = sf->mine;
81495fce210SBarry Smith if (iremote) *iremote = sf->remote;
815b8dee149SJunchao Zhang }
8163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
81795fce210SBarry Smith }
81895fce210SBarry Smith
81929046d53SLisandro Dalcin /*@
82095fce210SBarry Smith PetscSFGetLeafRange - Get the active leaf ranges
82195fce210SBarry Smith
82295fce210SBarry Smith Not Collective
82395fce210SBarry Smith
8244165533cSJose E. Roman Input Parameter:
82595fce210SBarry Smith . sf - star forest
82695fce210SBarry Smith
8274165533cSJose E. Roman Output Parameters:
8282f04c522SBarry Smith + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves.
8292f04c522SBarry Smith - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves.
83095fce210SBarry Smith
83195fce210SBarry Smith Level: developer
83295fce210SBarry Smith
8332f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
83495fce210SBarry Smith @*/
PetscSFGetLeafRange(PetscSF sf,PetscInt * minleaf,PetscInt * maxleaf)835d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
836d71ae5a4SJacob Faibussowitsch {
83795fce210SBarry Smith PetscFunctionBegin;
83895fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
83929046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1);
84095fce210SBarry Smith if (minleaf) *minleaf = sf->minleaf;
84195fce210SBarry Smith if (maxleaf) *maxleaf = sf->maxleaf;
8423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
84395fce210SBarry Smith }
84495fce210SBarry Smith
845ffeef943SBarry Smith /*@
846cab54364SBarry Smith PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
847fe2efc57SMark
84820f4b53cSBarry Smith Collective
849fe2efc57SMark
850fe2efc57SMark Input Parameters:
851fe2efc57SMark + A - the star forest
852cab54364SBarry Smith . obj - Optional object that provides the prefix for the option names
853736c3998SJose E. Roman - name - command line option
854fe2efc57SMark
855fe2efc57SMark Level: intermediate
856cab54364SBarry Smith
85720662ed9SBarry Smith Note:
85820662ed9SBarry Smith See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
85920662ed9SBarry Smith
8602f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
861fe2efc57SMark @*/
PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])862d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
863d71ae5a4SJacob Faibussowitsch {
864fe2efc57SMark PetscFunctionBegin;
865fe2efc57SMark PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
8669566063dSJacob Faibussowitsch PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
8673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
868fe2efc57SMark }
869fe2efc57SMark
870ffeef943SBarry Smith /*@
87195fce210SBarry Smith PetscSFView - view a star forest
87295fce210SBarry Smith
87395fce210SBarry Smith Collective
87495fce210SBarry Smith
8754165533cSJose E. Roman Input Parameters:
87695fce210SBarry Smith + sf - star forest
877cab54364SBarry Smith - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
87895fce210SBarry Smith
87995fce210SBarry Smith Level: beginner
88095fce210SBarry Smith
8812f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
88295fce210SBarry Smith @*/
PetscSFView(PetscSF sf,PetscViewer viewer)883d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
884d71ae5a4SJacob Faibussowitsch {
8859f196a02SMartin Diehl PetscBool isascii;
88695fce210SBarry Smith PetscViewerFormat format;
88795fce210SBarry Smith
88895fce210SBarry Smith PetscFunctionBegin;
88995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
8909566063dSJacob Faibussowitsch if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
89195fce210SBarry Smith PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
89295fce210SBarry Smith PetscCheckSameComm(sf, 1, viewer, 2);
8939566063dSJacob Faibussowitsch if (sf->graphset) PetscCall(PetscSFSetUp(sf));
8949f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
8959f196a02SMartin Diehl if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
89695fce210SBarry Smith PetscMPIInt rank;
8976497c311SBarry Smith PetscInt j;
89895fce210SBarry Smith
8999566063dSJacob Faibussowitsch PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
9009566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushTab(viewer));
901dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
90280153354SVaclav Hapla if (!sf->graphset) {
9039566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
9049566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer));
9053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
90680153354SVaclav Hapla }
9079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
9089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9096497c311SBarry Smith PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
910835f2295SStefano Zampini for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
9119566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer));
9129566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format));
91395fce210SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
91481bfa7aaSJed Brown PetscMPIInt *tmpranks, *perm;
9156497c311SBarry Smith
9169566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
9179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
9186497c311SBarry Smith for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
9199566063dSJacob Faibussowitsch PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
9209566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
9216497c311SBarry Smith for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
9226497c311SBarry Smith PetscMPIInt i = perm[ii];
9236497c311SBarry Smith
9249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
92548a46eb9SPierre Jolivet for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
92695fce210SBarry Smith }
9279566063dSJacob Faibussowitsch PetscCall(PetscFree2(tmpranks, perm));
92895fce210SBarry Smith }
9299566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer));
9309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer));
931dd5b3ca6SJunchao Zhang }
9329566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopTab(viewer));
93395fce210SBarry Smith }
934dbbe0bcdSBarry Smith PetscTryTypeMethod(sf, View, viewer);
9353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
93695fce210SBarry Smith }
93795fce210SBarry Smith
93895fce210SBarry Smith /*@C
9392f04c522SBarry Smith PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process
94095fce210SBarry Smith
94195fce210SBarry Smith Not Collective
94295fce210SBarry Smith
9434165533cSJose E. Roman Input Parameter:
94495fce210SBarry Smith . sf - star forest
94595fce210SBarry Smith
9464165533cSJose E. Roman Output Parameters:
9472f04c522SBarry Smith + nranks - number of MPI processes referenced by local part
9482f04c522SBarry Smith . ranks - [`nranks`] array of MPI ranks
9492f04c522SBarry Smith . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process
9502f04c522SBarry Smith . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL`
9512f04c522SBarry Smith - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL`
95295fce210SBarry Smith
95395fce210SBarry Smith Level: developer
95495fce210SBarry Smith
9552f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()`
95695fce210SBarry Smith @*/
PetscSFGetRootRanks(PetscSF sf,PetscMPIInt * nranks,const PetscMPIInt * ranks[],const PetscInt * roffset[],const PetscInt * rmine[],const PetscInt * rremote[])9579c9354e5SBarry Smith PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
958d71ae5a4SJacob Faibussowitsch {
95995fce210SBarry Smith PetscFunctionBegin;
96095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
96128b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
962dec1416fSJunchao Zhang if (sf->ops->GetRootRanks) {
9639927e4dfSBarry Smith PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
964dec1416fSJunchao Zhang } else {
965dec1416fSJunchao Zhang /* The generic implementation */
96695fce210SBarry Smith if (nranks) *nranks = sf->nranks;
96795fce210SBarry Smith if (ranks) *ranks = sf->ranks;
96895fce210SBarry Smith if (roffset) *roffset = sf->roffset;
96995fce210SBarry Smith if (rmine) *rmine = sf->rmine;
97095fce210SBarry Smith if (rremote) *rremote = sf->rremote;
971dec1416fSJunchao Zhang }
9723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
97395fce210SBarry Smith }
97495fce210SBarry Smith
9758750ddebSJunchao Zhang /*@C
9762f04c522SBarry Smith PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process
9778750ddebSJunchao Zhang
9788750ddebSJunchao Zhang Not Collective
9798750ddebSJunchao Zhang
9804165533cSJose E. Roman Input Parameter:
9818750ddebSJunchao Zhang . sf - star forest
9828750ddebSJunchao Zhang
9834165533cSJose E. Roman Output Parameters:
9842f04c522SBarry Smith + niranks - number of leaf MPI processes referencing roots on this process
9852f04c522SBarry Smith . iranks - [`niranks`] array of MPI ranks
9862f04c522SBarry Smith . ioffset - [`niranks`+1] offset in `irootloc` for each MPI process
9872f04c522SBarry Smith - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process
9888750ddebSJunchao Zhang
9898750ddebSJunchao Zhang Level: developer
9908750ddebSJunchao Zhang
9912f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
9928750ddebSJunchao Zhang @*/
PetscSFGetLeafRanks(PetscSF sf,PetscMPIInt * niranks,const PetscMPIInt * iranks[],const PetscInt * ioffset[],const PetscInt * irootloc[])9939c9354e5SBarry Smith PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
994d71ae5a4SJacob Faibussowitsch {
9958750ddebSJunchao Zhang PetscFunctionBegin;
9968750ddebSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
99728b400f6SJacob Faibussowitsch PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
9988750ddebSJunchao Zhang if (sf->ops->GetLeafRanks) {
9999927e4dfSBarry Smith PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
10008750ddebSJunchao Zhang } else {
10018750ddebSJunchao Zhang PetscSFType type;
10029566063dSJacob Faibussowitsch PetscCall(PetscSFGetType(sf, &type));
100398921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
10048750ddebSJunchao Zhang }
10053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
10068750ddebSJunchao Zhang }
10078750ddebSJunchao Zhang
InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt * list)1008d71ae5a4SJacob Faibussowitsch static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1009d71ae5a4SJacob Faibussowitsch {
1010b5a8e515SJed Brown PetscInt i;
1011b5a8e515SJed Brown for (i = 0; i < n; i++) {
1012b5a8e515SJed Brown if (needle == list[i]) return PETSC_TRUE;
1013b5a8e515SJed Brown }
1014b5a8e515SJed Brown return PETSC_FALSE;
1015b5a8e515SJed Brown }
1016b5a8e515SJed Brown
101795fce210SBarry Smith /*@C
10182f04c522SBarry Smith PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations.
101921c688dcSJed Brown
102021c688dcSJed Brown Collective
102121c688dcSJed Brown
10224165533cSJose E. Roman Input Parameters:
1023cab54364SBarry Smith + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1024cab54364SBarry Smith - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
102521c688dcSJed Brown
102621c688dcSJed Brown Level: developer
102721c688dcSJed Brown
10282f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
102921c688dcSJed Brown @*/
PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)1030d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1031d71ae5a4SJacob Faibussowitsch {
1032eec179cfSJacob Faibussowitsch PetscHMapI table;
1033eec179cfSJacob Faibussowitsch PetscHashIter pos;
10346497c311SBarry Smith PetscMPIInt size, groupsize, *groupranks, *ranks;
10356497c311SBarry Smith PetscInt *rcount;
10366497c311SBarry Smith PetscInt irank, sfnrank, ranksi;
10376497c311SBarry Smith PetscMPIInt i, orank = -1;
103821c688dcSJed Brown
103921c688dcSJed Brown PetscFunctionBegin;
104021c688dcSJed Brown PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
104129046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1);
10429566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1043eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(10, &table));
104421c688dcSJed Brown for (i = 0; i < sf->nleaves; i++) {
104521c688dcSJed Brown /* Log 1-based rank */
1046eec179cfSJacob Faibussowitsch PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
104721c688dcSJed Brown }
10486497c311SBarry Smith PetscCall(PetscHMapIGetSize(table, &sfnrank));
10496497c311SBarry Smith PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
10509566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
10519566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1052eec179cfSJacob Faibussowitsch PetscHashIterBegin(table, pos);
105321c688dcSJed Brown for (i = 0; i < sf->nranks; i++) {
10546497c311SBarry Smith PetscHashIterGetKey(table, pos, ranksi);
10556497c311SBarry Smith PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1056eec179cfSJacob Faibussowitsch PetscHashIterGetVal(table, pos, rcount[i]);
1057eec179cfSJacob Faibussowitsch PetscHashIterNext(table, pos);
105821c688dcSJed Brown ranks[i]--; /* Convert back to 0-based */
105921c688dcSJed Brown }
1060eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&table));
1061b5a8e515SJed Brown
1062b5a8e515SJed Brown /* We expect that dgroup is reliably "small" while nranks could be large */
1063b5a8e515SJed Brown {
10647fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL;
1065b5a8e515SJed Brown PetscMPIInt *dgroupranks;
10666497c311SBarry Smith
10679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
10689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
10699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &dgroupranks));
10709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(groupsize, &groupranks));
1071b5a8e515SJed Brown for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
10729566063dSJacob Faibussowitsch if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
10739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group));
10749566063dSJacob Faibussowitsch PetscCall(PetscFree(dgroupranks));
1075b5a8e515SJed Brown }
1076b5a8e515SJed Brown
1077b5a8e515SJed Brown /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1078b5a8e515SJed Brown for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1079b5a8e515SJed Brown for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1080b5a8e515SJed Brown if (InList(ranks[i], groupsize, groupranks)) break;
1081b5a8e515SJed Brown }
1082b5a8e515SJed Brown for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1083b5a8e515SJed Brown if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1084b5a8e515SJed Brown }
1085b5a8e515SJed Brown if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
10866497c311SBarry Smith PetscMPIInt tmprank;
10876497c311SBarry Smith PetscInt tmpcount;
1088247e8311SStefano Zampini
1089b5a8e515SJed Brown tmprank = ranks[i];
1090b5a8e515SJed Brown tmpcount = rcount[i];
1091b5a8e515SJed Brown ranks[i] = ranks[sf->ndranks];
1092b5a8e515SJed Brown rcount[i] = rcount[sf->ndranks];
1093b5a8e515SJed Brown ranks[sf->ndranks] = tmprank;
1094b5a8e515SJed Brown rcount[sf->ndranks] = tmpcount;
1095b5a8e515SJed Brown sf->ndranks++;
1096b5a8e515SJed Brown }
1097b5a8e515SJed Brown }
10989566063dSJacob Faibussowitsch PetscCall(PetscFree(groupranks));
10996497c311SBarry Smith PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
11006497c311SBarry Smith if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
110121c688dcSJed Brown sf->roffset[0] = 0;
110221c688dcSJed Brown for (i = 0; i < sf->nranks; i++) {
11039566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
110421c688dcSJed Brown sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
110521c688dcSJed Brown rcount[i] = 0;
110621c688dcSJed Brown }
1107247e8311SStefano Zampini for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1108247e8311SStefano Zampini /* short circuit */
1109247e8311SStefano Zampini if (orank != sf->remote[i].rank) {
111021c688dcSJed Brown /* Search for index of iremote[i].rank in sf->ranks */
1111835f2295SStefano Zampini PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1112835f2295SStefano Zampini PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1113b5a8e515SJed Brown if (irank < 0) {
1114835f2295SStefano Zampini PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1115b5a8e515SJed Brown if (irank >= 0) irank += sf->ndranks;
111621c688dcSJed Brown }
1117247e8311SStefano Zampini }
1118835f2295SStefano Zampini PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
111921c688dcSJed Brown sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
112021c688dcSJed Brown sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
112121c688dcSJed Brown rcount[irank]++;
112221c688dcSJed Brown }
11239566063dSJacob Faibussowitsch PetscCall(PetscFree2(rcount, ranks));
11243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
112521c688dcSJed Brown }
112621c688dcSJed Brown
112721c688dcSJed Brown /*@C
112895fce210SBarry Smith PetscSFGetGroups - gets incoming and outgoing process groups
112995fce210SBarry Smith
113095fce210SBarry Smith Collective
113195fce210SBarry Smith
11324165533cSJose E. Roman Input Parameter:
113395fce210SBarry Smith . sf - star forest
113495fce210SBarry Smith
11354165533cSJose E. Roman Output Parameters:
113695fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots)
113795fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference)
113895fce210SBarry Smith
113995fce210SBarry Smith Level: developer
114095fce210SBarry Smith
11412f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
114295fce210SBarry Smith @*/
PetscSFGetGroups(PetscSF sf,MPI_Group * incoming,MPI_Group * outgoing)1143d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1144d71ae5a4SJacob Faibussowitsch {
11457fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL;
114695fce210SBarry Smith
114795fce210SBarry Smith PetscFunctionBegin;
114818aa8208SJames Wright PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
114908401ef6SPierre Jolivet PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
115095fce210SBarry Smith if (sf->ingroup == MPI_GROUP_NULL) {
115195fce210SBarry Smith PetscInt i;
115295fce210SBarry Smith const PetscInt *indegree;
11536497c311SBarry Smith PetscMPIInt rank, *outranks, *inranks, indegree0;
115495fce210SBarry Smith PetscSFNode *remote;
115595fce210SBarry Smith PetscSF bgcount;
115695fce210SBarry Smith
115795fce210SBarry Smith /* Compute the number of incoming ranks */
11589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nranks, &remote));
115995fce210SBarry Smith for (i = 0; i < sf->nranks; i++) {
116095fce210SBarry Smith remote[i].rank = sf->ranks[i];
116195fce210SBarry Smith remote[i].index = 0;
116295fce210SBarry Smith }
11639566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
11649566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
11659566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
11669566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
116795fce210SBarry Smith /* Enumerate the incoming ranks */
11689566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
11699566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
117095fce210SBarry Smith for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
11719566063dSJacob Faibussowitsch PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
11729566063dSJacob Faibussowitsch PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
11739566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11746497c311SBarry Smith PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
11756497c311SBarry Smith PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
11769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group));
11779566063dSJacob Faibussowitsch PetscCall(PetscFree2(inranks, outranks));
11789566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&bgcount));
117995fce210SBarry Smith }
118095fce210SBarry Smith *incoming = sf->ingroup;
118195fce210SBarry Smith
118295fce210SBarry Smith if (sf->outgroup == MPI_GROUP_NULL) {
11839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
11849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
11859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&group));
118695fce210SBarry Smith }
118795fce210SBarry Smith *outgoing = sf->outgroup;
11883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
118995fce210SBarry Smith }
119095fce210SBarry Smith
119129046d53SLisandro Dalcin /*@
11920dd791a8SStefano Zampini PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
11930dd791a8SStefano Zampini
11940dd791a8SStefano Zampini Collective
11950dd791a8SStefano Zampini
11960dd791a8SStefano Zampini Input Parameter:
11970dd791a8SStefano Zampini . sf - star forest
11980dd791a8SStefano Zampini
11990dd791a8SStefano Zampini Output Parameter:
12002f04c522SBarry Smith . rsf - the star forest with a single root per MPI process to perform communications
12010dd791a8SStefano Zampini
12020dd791a8SStefano Zampini Level: developer
12030dd791a8SStefano Zampini
12042f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
12050dd791a8SStefano Zampini @*/
PetscSFGetRanksSF(PetscSF sf,PetscSF * rsf)12060dd791a8SStefano Zampini PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
12070dd791a8SStefano Zampini {
12080dd791a8SStefano Zampini PetscFunctionBegin;
12090dd791a8SStefano Zampini PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12100dd791a8SStefano Zampini PetscAssertPointer(rsf, 2);
12110dd791a8SStefano Zampini if (!sf->rankssf) {
12120dd791a8SStefano Zampini PetscSFNode *rremotes;
12130dd791a8SStefano Zampini const PetscMPIInt *ranks;
12146497c311SBarry Smith PetscMPIInt nranks;
12150dd791a8SStefano Zampini
12160dd791a8SStefano Zampini PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
12170dd791a8SStefano Zampini PetscCall(PetscMalloc1(nranks, &rremotes));
12180dd791a8SStefano Zampini for (PetscInt i = 0; i < nranks; i++) {
12190dd791a8SStefano Zampini rremotes[i].rank = ranks[i];
12200dd791a8SStefano Zampini rremotes[i].index = 0;
12210dd791a8SStefano Zampini }
12220dd791a8SStefano Zampini PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
12230dd791a8SStefano Zampini PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
12240dd791a8SStefano Zampini }
12250dd791a8SStefano Zampini *rsf = sf->rankssf;
12260dd791a8SStefano Zampini PetscFunctionReturn(PETSC_SUCCESS);
12270dd791a8SStefano Zampini }
12280dd791a8SStefano Zampini
12290dd791a8SStefano Zampini /*@
1230cab54364SBarry Smith PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
123195fce210SBarry Smith
123295fce210SBarry Smith Collective
123395fce210SBarry Smith
12344165533cSJose E. Roman Input Parameter:
123595fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex
123695fce210SBarry Smith
12374165533cSJose E. Roman Output Parameter:
12382f04c522SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf)
123995fce210SBarry Smith
124095fce210SBarry Smith Level: developer
124195fce210SBarry Smith
1242cab54364SBarry Smith Note:
1243cab54364SBarry Smith In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
124495fce210SBarry Smith directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
124595fce210SBarry Smith edge, it is a candidate for future optimization that might involve its removal.
124695fce210SBarry Smith
12472f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
124895fce210SBarry Smith @*/
PetscSFGetMultiSF(PetscSF sf,PetscSF * multi)1249d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1250d71ae5a4SJacob Faibussowitsch {
125195fce210SBarry Smith PetscFunctionBegin;
125295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
12534f572ea9SToby Isaac PetscAssertPointer(multi, 2);
125495fce210SBarry Smith if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
12559566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
125695fce210SBarry Smith *multi = sf->multi;
1257013b3241SStefano Zampini sf->multi->multi = sf->multi;
12583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
125995fce210SBarry Smith }
126095fce210SBarry Smith if (!sf->multi) {
126195fce210SBarry Smith const PetscInt *indegree;
12629837ea96SMatthew G. Knepley PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
126395fce210SBarry Smith PetscSFNode *remote;
126429046d53SLisandro Dalcin maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
12659566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
12669566063dSJacob Faibussowitsch PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
12679566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
126895fce210SBarry Smith inoffset[0] = 0;
126995fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
12709837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outones[i] = 1;
12719566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
12729566063dSJacob Faibussowitsch PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
127395fce210SBarry Smith for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
127476bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1275ad540459SPierre Jolivet for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
127676bd3646SJed Brown }
12779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &remote));
127895fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) {
127995fce210SBarry Smith remote[i].rank = sf->remote[i].rank;
128038e7336fSToby Isaac remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
128195fce210SBarry Smith }
12829566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1283013b3241SStefano Zampini sf->multi->multi = sf->multi;
12849566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
128595fce210SBarry Smith if (sf->rankorder) { /* Sort the ranks */
128695fce210SBarry Smith PetscMPIInt rank;
128795fce210SBarry Smith PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
128895fce210SBarry Smith PetscSFNode *newremote;
12899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
129095fce210SBarry Smith for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
12919566063dSJacob Faibussowitsch PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
12929837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) outranks[i] = rank;
12939566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
12949566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
129595fce210SBarry Smith /* Sort the incoming ranks at each vertex, build the inverse map */
129695fce210SBarry Smith for (i = 0; i < sf->nroots; i++) {
129795fce210SBarry Smith PetscInt j;
129895fce210SBarry Smith for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
12998e3a54c0SPierre Jolivet PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
130095fce210SBarry Smith for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
130195fce210SBarry Smith }
13029566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13039566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
13049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(sf->nleaves, &newremote));
130595fce210SBarry Smith for (i = 0; i < sf->nleaves; i++) {
130695fce210SBarry Smith newremote[i].rank = sf->remote[i].rank;
130701365b40SToby Isaac newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
130895fce210SBarry Smith }
13099566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
13109566063dSJacob Faibussowitsch PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
131195fce210SBarry Smith }
13129566063dSJacob Faibussowitsch PetscCall(PetscFree3(inoffset, outones, outoffset));
131395fce210SBarry Smith }
131495fce210SBarry Smith *multi = sf->multi;
13153ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
131695fce210SBarry Smith }
131795fce210SBarry Smith
131895fce210SBarry Smith /*@C
131920662ed9SBarry Smith PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
132095fce210SBarry Smith
132195fce210SBarry Smith Collective
132295fce210SBarry Smith
13234165533cSJose E. Roman Input Parameters:
132495fce210SBarry Smith + sf - original star forest
13252f04c522SBarry Smith . nselected - number of selected roots on this MPI process
13262f04c522SBarry Smith - selected - indices of the selected roots on this MPI process
132795fce210SBarry Smith
13284165533cSJose E. Roman Output Parameter:
1329cd620004SJunchao Zhang . esf - new star forest
133095fce210SBarry Smith
133195fce210SBarry Smith Level: advanced
133295fce210SBarry Smith
133395fce210SBarry Smith Note:
1334cab54364SBarry Smith To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
133595fce210SBarry Smith be done by calling PetscSFGetGraph().
133695fce210SBarry Smith
13372f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
133895fce210SBarry Smith @*/
PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt selected[],PetscSF * esf)1339cf84bf9eSBarry Smith PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1340d71ae5a4SJacob Faibussowitsch {
1341cd620004SJunchao Zhang PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1342cd620004SJunchao Zhang const PetscInt *ilocal;
1343cd620004SJunchao Zhang signed char *rootdata, *leafdata, *leafmem;
1344ba2a7774SJunchao Zhang const PetscSFNode *iremote;
1345f659e5c7SJunchao Zhang PetscSFNode *new_iremote;
1346f659e5c7SJunchao Zhang MPI_Comm comm;
134795fce210SBarry Smith
134895fce210SBarry Smith PetscFunctionBegin;
134995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
135029046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1);
13514f572ea9SToby Isaac if (nselected) PetscAssertPointer(selected, 3);
13524f572ea9SToby Isaac PetscAssertPointer(esf, 4);
13530511a646SMatthew G. Knepley
13549566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
13559566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
13569566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
13579566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1358cd620004SJunchao Zhang
135976bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1360cd620004SJunchao Zhang PetscBool dups;
13619566063dSJacob Faibussowitsch PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
136228b400f6SJacob Faibussowitsch PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1363511e6246SStefano Zampini for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1364cd620004SJunchao Zhang }
1365f659e5c7SJunchao Zhang
1366dbbe0bcdSBarry Smith if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1367dbbe0bcdSBarry Smith else {
1368cd620004SJunchao Zhang /* A generic version of creating embedded sf */
13699566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1370cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1;
13719566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
13728e3a54c0SPierre Jolivet leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1373cd620004SJunchao Zhang /* Tag selected roots and bcast to leaves */
1374cd620004SJunchao Zhang for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
13759566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
13769566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1377ba2a7774SJunchao Zhang
1378cd620004SJunchao Zhang /* Build esf with leaves that are still connected */
1379cd620004SJunchao Zhang esf_nleaves = 0;
1380cd620004SJunchao Zhang for (i = 0; i < nleaves; i++) {
1381cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i;
1382cd620004SJunchao Zhang /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1383cd620004SJunchao Zhang with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1384cd620004SJunchao Zhang */
1385cd620004SJunchao Zhang esf_nleaves += (leafdata[j] ? 1 : 0);
1386cd620004SJunchao Zhang }
13879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
13889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1389cd620004SJunchao Zhang for (i = n = 0; i < nleaves; i++) {
1390cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i;
1391cd620004SJunchao Zhang if (leafdata[j]) {
1392cd620004SJunchao Zhang new_ilocal[n] = j;
1393cd620004SJunchao Zhang new_iremote[n].rank = iremote[i].rank;
1394cd620004SJunchao Zhang new_iremote[n].index = iremote[i].index;
1395fc1ede2bSMatthew G. Knepley ++n;
139695fce210SBarry Smith }
139795fce210SBarry Smith }
13989566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, esf));
13999566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*esf));
14009566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
14019566063dSJacob Faibussowitsch PetscCall(PetscFree2(rootdata, leafmem));
1402f659e5c7SJunchao Zhang }
14039566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
14043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
140595fce210SBarry Smith }
140695fce210SBarry Smith
14072f5fb4c2SMatthew G. Knepley /*@C
140820662ed9SBarry Smith PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
14092f5fb4c2SMatthew G. Knepley
14102f5fb4c2SMatthew G. Knepley Collective
14112f5fb4c2SMatthew G. Knepley
14124165533cSJose E. Roman Input Parameters:
14132f5fb4c2SMatthew G. Knepley + sf - original star forest
14142f04c522SBarry Smith . nselected - number of selected leaves on this MPI process
14152f04c522SBarry Smith - selected - indices of the selected leaves on this MPI process
14162f5fb4c2SMatthew G. Knepley
14174165533cSJose E. Roman Output Parameter:
14182f5fb4c2SMatthew G. Knepley . newsf - new star forest
14192f5fb4c2SMatthew G. Knepley
14202f5fb4c2SMatthew G. Knepley Level: advanced
14212f5fb4c2SMatthew G. Knepley
14222f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
14232f5fb4c2SMatthew G. Knepley @*/
PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt selected[],PetscSF * newsf)1424cf84bf9eSBarry Smith PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1425d71ae5a4SJacob Faibussowitsch {
1426f659e5c7SJunchao Zhang const PetscSFNode *iremote;
1427f659e5c7SJunchao Zhang PetscSFNode *new_iremote;
1428f659e5c7SJunchao Zhang const PetscInt *ilocal;
1429f659e5c7SJunchao Zhang PetscInt i, nroots, *leaves, *new_ilocal;
1430f659e5c7SJunchao Zhang MPI_Comm comm;
14312f5fb4c2SMatthew G. Knepley
14322f5fb4c2SMatthew G. Knepley PetscFunctionBegin;
14332f5fb4c2SMatthew G. Knepley PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
143429046d53SLisandro Dalcin PetscSFCheckGraphSet(sf, 1);
14354f572ea9SToby Isaac if (nselected) PetscAssertPointer(selected, 3);
14364f572ea9SToby Isaac PetscAssertPointer(newsf, 4);
14372f5fb4c2SMatthew G. Knepley
1438f659e5c7SJunchao Zhang /* Uniq selected[] and put results in leaves[] */
14399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
14409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &leaves));
14419566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(leaves, selected, nselected));
14429566063dSJacob Faibussowitsch PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
144308401ef6SPierre Jolivet PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1444f659e5c7SJunchao Zhang
1445f659e5c7SJunchao Zhang /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1446dbbe0bcdSBarry Smith if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1447dbbe0bcdSBarry Smith else {
14489566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
14499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_ilocal));
14509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nselected, &new_iremote));
1451f659e5c7SJunchao Zhang for (i = 0; i < nselected; ++i) {
1452f659e5c7SJunchao Zhang const PetscInt l = leaves[i];
1453f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l;
1454f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank;
1455f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index;
14562f5fb4c2SMatthew G. Knepley }
14579566063dSJacob Faibussowitsch PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
14589566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1459f659e5c7SJunchao Zhang }
14609566063dSJacob Faibussowitsch PetscCall(PetscFree(leaves));
14613ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
14622f5fb4c2SMatthew G. Knepley }
14632f5fb4c2SMatthew G. Knepley
146495fce210SBarry Smith /*@C
1465cab54364SBarry Smith PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
14663482bfa8SJunchao Zhang
1467c3339decSBarry Smith Collective
14683482bfa8SJunchao Zhang
14694165533cSJose E. Roman Input Parameters:
14703482bfa8SJunchao Zhang + sf - star forest on which to communicate
14713482bfa8SJunchao Zhang . unit - data type associated with each node
14723482bfa8SJunchao Zhang . rootdata - buffer to broadcast
14733482bfa8SJunchao Zhang - op - operation to use for reduction
14743482bfa8SJunchao Zhang
14754165533cSJose E. Roman Output Parameter:
14763482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
14773482bfa8SJunchao Zhang
14783482bfa8SJunchao Zhang Level: intermediate
14793482bfa8SJunchao Zhang
148020662ed9SBarry Smith Note:
14812f04c522SBarry Smith When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
14822f04c522SBarry Smith are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1483cab54364SBarry Smith use `PetscSFBcastWithMemTypeBegin()` instead.
1484cab54364SBarry Smith
14852f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
14863482bfa8SJunchao Zhang @*/
PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)1487d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1488d71ae5a4SJacob Faibussowitsch {
1489eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype;
14903482bfa8SJunchao Zhang
14913482bfa8SJunchao Zhang PetscFunctionBegin;
14923482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
14939566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
14949566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
14959566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype));
14969566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype));
1497dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
14989566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
14993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
15003482bfa8SJunchao Zhang }
15013482bfa8SJunchao Zhang
15023482bfa8SJunchao Zhang /*@C
150320662ed9SBarry Smith PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
150420662ed9SBarry Smith to `PetscSFBcastEnd()`
1505d0295fc0SJunchao Zhang
1506c3339decSBarry Smith Collective
1507d0295fc0SJunchao Zhang
15084165533cSJose E. Roman Input Parameters:
1509d0295fc0SJunchao Zhang + sf - star forest on which to communicate
1510d0295fc0SJunchao Zhang . unit - data type associated with each node
15112f04c522SBarry Smith . rootmtype - memory type of `rootdata`
1512d0295fc0SJunchao Zhang . rootdata - buffer to broadcast
15132f04c522SBarry Smith . leafmtype - memory type of `leafdata`
1514d0295fc0SJunchao Zhang - op - operation to use for reduction
1515d0295fc0SJunchao Zhang
15164165533cSJose E. Roman Output Parameter:
1517d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
1518d0295fc0SJunchao Zhang
1519d0295fc0SJunchao Zhang Level: intermediate
1520d0295fc0SJunchao Zhang
15212f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1522d0295fc0SJunchao Zhang @*/
PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void * rootdata,PetscMemType leafmtype,void * leafdata,MPI_Op op)1523d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1524d71ae5a4SJacob Faibussowitsch {
1525d0295fc0SJunchao Zhang PetscFunctionBegin;
1526d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15279566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
15289566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1529dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
15309566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
15313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1532d0295fc0SJunchao Zhang }
1533d0295fc0SJunchao Zhang
1534d0295fc0SJunchao Zhang /*@C
153520662ed9SBarry Smith PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
15363482bfa8SJunchao Zhang
15373482bfa8SJunchao Zhang Collective
15383482bfa8SJunchao Zhang
15394165533cSJose E. Roman Input Parameters:
15403482bfa8SJunchao Zhang + sf - star forest
15413482bfa8SJunchao Zhang . unit - data type
15423482bfa8SJunchao Zhang . rootdata - buffer to broadcast
15433482bfa8SJunchao Zhang - op - operation to use for reduction
15443482bfa8SJunchao Zhang
15454165533cSJose E. Roman Output Parameter:
15463482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root
15473482bfa8SJunchao Zhang
15483482bfa8SJunchao Zhang Level: intermediate
15493482bfa8SJunchao Zhang
15502f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
15513482bfa8SJunchao Zhang @*/
PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)1552d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1553d71ae5a4SJacob Faibussowitsch {
15543482bfa8SJunchao Zhang PetscFunctionBegin;
15553482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15569566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1557dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
15589566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
15593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
15603482bfa8SJunchao Zhang }
15613482bfa8SJunchao Zhang
15623482bfa8SJunchao Zhang /*@C
15632f04c522SBarry Smith PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()`
156495fce210SBarry Smith
156595fce210SBarry Smith Collective
156695fce210SBarry Smith
15674165533cSJose E. Roman Input Parameters:
156895fce210SBarry Smith + sf - star forest
156995fce210SBarry Smith . unit - data type
15702f04c522SBarry Smith . leafdata - values to reduce (communicate)
157195fce210SBarry Smith - op - reduction operation
157295fce210SBarry Smith
15734165533cSJose E. Roman Output Parameter:
15742f04c522SBarry Smith . rootdata - result of reduction of values (`leafdata`) from all leaves to each root
157595fce210SBarry Smith
157695fce210SBarry Smith Level: intermediate
157795fce210SBarry Smith
157820662ed9SBarry Smith Note:
15792f04c522SBarry Smith When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
15802f04c522SBarry Smith are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1581cab54364SBarry Smith use `PetscSFReduceWithMemTypeBegin()` instead.
1582d0295fc0SJunchao Zhang
15832f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype`
158495fce210SBarry Smith @*/
PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)1585d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1586d71ae5a4SJacob Faibussowitsch {
1587eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype;
158895fce210SBarry Smith
158995fce210SBarry Smith PetscFunctionBegin;
159095fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
15919566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
15929566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15939566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype));
15949566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype));
1595f4f49eeaSPierre Jolivet PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
15969566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
15973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
159895fce210SBarry Smith }
159995fce210SBarry Smith
160095fce210SBarry Smith /*@C
16012f04c522SBarry Smith PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1602d0295fc0SJunchao Zhang
1603d0295fc0SJunchao Zhang Collective
1604d0295fc0SJunchao Zhang
16054165533cSJose E. Roman Input Parameters:
1606d0295fc0SJunchao Zhang + sf - star forest
1607d0295fc0SJunchao Zhang . unit - data type
16082f04c522SBarry Smith . leafmtype - memory type of `leafdata`
1609d0295fc0SJunchao Zhang . leafdata - values to reduce
16102f04c522SBarry Smith . rootmtype - memory type of `rootdata`
1611d0295fc0SJunchao Zhang - op - reduction operation
1612d0295fc0SJunchao Zhang
16134165533cSJose E. Roman Output Parameter:
1614d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root
1615d0295fc0SJunchao Zhang
1616d0295fc0SJunchao Zhang Level: intermediate
1617d0295fc0SJunchao Zhang
16182f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1619d0295fc0SJunchao Zhang @*/
PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void * leafdata,PetscMemType rootmtype,void * rootdata,MPI_Op op)1620d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1621d71ae5a4SJacob Faibussowitsch {
1622d0295fc0SJunchao Zhang PetscFunctionBegin;
1623d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16249566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
16259566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1626f4f49eeaSPierre Jolivet PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
16279566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
16283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1629d0295fc0SJunchao Zhang }
1630d0295fc0SJunchao Zhang
1631d0295fc0SJunchao Zhang /*@C
163220662ed9SBarry Smith PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
163395fce210SBarry Smith
163495fce210SBarry Smith Collective
163595fce210SBarry Smith
16364165533cSJose E. Roman Input Parameters:
163795fce210SBarry Smith + sf - star forest
163895fce210SBarry Smith . unit - data type
163995fce210SBarry Smith . leafdata - values to reduce
164095fce210SBarry Smith - op - reduction operation
164195fce210SBarry Smith
16424165533cSJose E. Roman Output Parameter:
164395fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root
164495fce210SBarry Smith
164595fce210SBarry Smith Level: intermediate
164695fce210SBarry Smith
16472f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
164895fce210SBarry Smith @*/
PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)1649d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1650d71ae5a4SJacob Faibussowitsch {
165195fce210SBarry Smith PetscFunctionBegin;
165295fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16539566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1654dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
16559566063dSJacob Faibussowitsch if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
16563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
165795fce210SBarry Smith }
165895fce210SBarry Smith
165995fce210SBarry Smith /*@C
16602f04c522SBarry Smith PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`,
1661cab54364SBarry Smith to be completed with `PetscSFFetchAndOpEnd()`
1662a1729e3fSJunchao Zhang
1663a1729e3fSJunchao Zhang Collective
1664a1729e3fSJunchao Zhang
16654165533cSJose E. Roman Input Parameters:
1666a1729e3fSJunchao Zhang + sf - star forest
1667a1729e3fSJunchao Zhang . unit - data type
1668a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1669a1729e3fSJunchao Zhang - op - operation to use for reduction
1670a1729e3fSJunchao Zhang
16714165533cSJose E. Roman Output Parameters:
1672a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update
16732f04c522SBarry Smith - leafupdate - state at each leaf's respective root immediately prior to atomic update
1674a1729e3fSJunchao Zhang
1675a1729e3fSJunchao Zhang Level: advanced
1676a1729e3fSJunchao Zhang
1677a1729e3fSJunchao Zhang Note:
1678a1729e3fSJunchao Zhang The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1679a1729e3fSJunchao Zhang might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1680a1729e3fSJunchao Zhang not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1681a1729e3fSJunchao Zhang integers.
1682a1729e3fSJunchao Zhang
16832f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1684a1729e3fSJunchao Zhang @*/
PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)1685d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1686d71ae5a4SJacob Faibussowitsch {
1687eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype, leafupdatemtype;
1688a1729e3fSJunchao Zhang
1689a1729e3fSJunchao Zhang PetscFunctionBegin;
1690a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
16919566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
16929566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16939566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype));
16949566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype));
16959566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
169608401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1697dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
16989566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
16993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1700a1729e3fSJunchao Zhang }
1701a1729e3fSJunchao Zhang
1702a1729e3fSJunchao Zhang /*@C
1703cab54364SBarry Smith PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
17042f04c522SBarry Smith applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()`
1705d3b3e55cSJunchao Zhang
1706d3b3e55cSJunchao Zhang Collective
1707d3b3e55cSJunchao Zhang
1708d3b3e55cSJunchao Zhang Input Parameters:
1709d3b3e55cSJunchao Zhang + sf - star forest
1710d3b3e55cSJunchao Zhang . unit - data type
17112f04c522SBarry Smith . rootmtype - memory type of `rootdata`
17122f04c522SBarry Smith . leafmtype - memory type of `leafdata`
1713d3b3e55cSJunchao Zhang . leafdata - leaf values to use in reduction
17142f04c522SBarry Smith . leafupdatemtype - memory type of `leafupdate`
1715d3b3e55cSJunchao Zhang - op - operation to use for reduction
1716d3b3e55cSJunchao Zhang
1717d3b3e55cSJunchao Zhang Output Parameters:
1718d3b3e55cSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update
17192f04c522SBarry Smith - leafupdate - state at each leaf's respective root immediately prior to atomic update
1720d3b3e55cSJunchao Zhang
1721d3b3e55cSJunchao Zhang Level: advanced
1722d3b3e55cSJunchao Zhang
1723cab54364SBarry Smith Note:
1724cab54364SBarry Smith See `PetscSFFetchAndOpBegin()` for more details.
1725d3b3e55cSJunchao Zhang
17262f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1727d3b3e55cSJunchao Zhang @*/
PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void * rootdata,PetscMemType leafmtype,const void * leafdata,PetscMemType leafupdatemtype,void * leafupdate,MPI_Op op)1728d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1729d71ae5a4SJacob Faibussowitsch {
1730d3b3e55cSJunchao Zhang PetscFunctionBegin;
1731d3b3e55cSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17329566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
17339566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
173408401ef6SPierre Jolivet PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1735dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
17369566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
17373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1738d3b3e55cSJunchao Zhang }
1739d3b3e55cSJunchao Zhang
1740d3b3e55cSJunchao Zhang /*@C
174120662ed9SBarry Smith PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
17422f04c522SBarry Smith to fetch values from roots and update atomically by applying operation using `leafdata`
1743a1729e3fSJunchao Zhang
1744a1729e3fSJunchao Zhang Collective
1745a1729e3fSJunchao Zhang
17464165533cSJose E. Roman Input Parameters:
1747a1729e3fSJunchao Zhang + sf - star forest
1748a1729e3fSJunchao Zhang . unit - data type
1749a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction
1750a1729e3fSJunchao Zhang - op - operation to use for reduction
1751a1729e3fSJunchao Zhang
17524165533cSJose E. Roman Output Parameters:
1753a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update
17542f04c522SBarry Smith - leafupdate - state at each leaf's respective root immediately prior to atomic update
1755a1729e3fSJunchao Zhang
1756a1729e3fSJunchao Zhang Level: advanced
1757a1729e3fSJunchao Zhang
17582f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1759a1729e3fSJunchao Zhang @*/
PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)1760d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1761d71ae5a4SJacob Faibussowitsch {
1762a1729e3fSJunchao Zhang PetscFunctionBegin;
1763a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
17649566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1765dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
17669566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
17673ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1768a1729e3fSJunchao Zhang }
1769a1729e3fSJunchao Zhang
1770a1729e3fSJunchao Zhang /*@C
17712f04c522SBarry Smith PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
177295fce210SBarry Smith
177395fce210SBarry Smith Collective
177495fce210SBarry Smith
17754165533cSJose E. Roman Input Parameter:
177695fce210SBarry Smith . sf - star forest
177795fce210SBarry Smith
17784165533cSJose E. Roman Output Parameter:
17792f04c522SBarry Smith . degree - degree (the number of leaves) of each root vertex
178095fce210SBarry Smith
178195fce210SBarry Smith Level: advanced
178295fce210SBarry Smith
1783cab54364SBarry Smith Note:
178420662ed9SBarry Smith The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1785ffe67aa5SVáclav Hapla
17862f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
178795fce210SBarry Smith @*/
PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt * degree[])17886497c311SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1789d71ae5a4SJacob Faibussowitsch {
179095fce210SBarry Smith PetscFunctionBegin;
179195fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
179295fce210SBarry Smith PetscSFCheckGraphSet(sf, 1);
17934f572ea9SToby Isaac PetscAssertPointer(degree, 2);
1794803bd9e8SMatthew G. Knepley if (!sf->degreeknown) {
17955b0d146aSStefano Zampini PetscInt i, nroots = sf->nroots, maxlocal;
179628b400f6SJacob Faibussowitsch PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
17975b0d146aSStefano Zampini maxlocal = sf->maxleaf - sf->minleaf + 1;
17989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nroots, &sf->degree));
17999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
180029046d53SLisandro Dalcin for (i = 0; i < nroots; i++) sf->degree[i] = 0;
18019837ea96SMatthew G. Knepley for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
18029566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
180395fce210SBarry Smith }
180495fce210SBarry Smith *degree = NULL;
18053ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
180695fce210SBarry Smith }
180795fce210SBarry Smith
180895fce210SBarry Smith /*@C
1809cab54364SBarry Smith PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
181095fce210SBarry Smith
181195fce210SBarry Smith Collective
181295fce210SBarry Smith
18134165533cSJose E. Roman Input Parameter:
181495fce210SBarry Smith . sf - star forest
181595fce210SBarry Smith
18164165533cSJose E. Roman Output Parameter:
181795fce210SBarry Smith . degree - degree of each root vertex
181895fce210SBarry Smith
181995fce210SBarry Smith Level: developer
182095fce210SBarry Smith
1821cab54364SBarry Smith Note:
182220662ed9SBarry Smith The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1823ffe67aa5SVáclav Hapla
18242f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
182595fce210SBarry Smith @*/
PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt * degree[])18269c9354e5SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1827d71ae5a4SJacob Faibussowitsch {
182895fce210SBarry Smith PetscFunctionBegin;
182995fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
183095fce210SBarry Smith PetscSFCheckGraphSet(sf, 1);
18314f572ea9SToby Isaac PetscAssertPointer(degree, 2);
183295fce210SBarry Smith if (!sf->degreeknown) {
183328b400f6SJacob Faibussowitsch PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
18349566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
18359566063dSJacob Faibussowitsch PetscCall(PetscFree(sf->degreetmp));
183695fce210SBarry Smith sf->degreeknown = PETSC_TRUE;
183795fce210SBarry Smith }
183895fce210SBarry Smith *degree = sf->degree;
18393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
184095fce210SBarry Smith }
184195fce210SBarry Smith
1842673100f5SVaclav Hapla /*@C
184320662ed9SBarry Smith PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
184466dfcd1aSVaclav Hapla Each multi-root is assigned index of the corresponding original root.
1845673100f5SVaclav Hapla
1846673100f5SVaclav Hapla Collective
1847673100f5SVaclav Hapla
18484165533cSJose E. Roman Input Parameters:
1849673100f5SVaclav Hapla + sf - star forest
18502f04c522SBarry Smith - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()`
1851673100f5SVaclav Hapla
18524165533cSJose E. Roman Output Parameters:
185320662ed9SBarry Smith + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
185420662ed9SBarry Smith - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1855673100f5SVaclav Hapla
1856673100f5SVaclav Hapla Level: developer
1857673100f5SVaclav Hapla
1858cab54364SBarry Smith Note:
18592f04c522SBarry Smith The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed.
1860ffe67aa5SVáclav Hapla
18612f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1862673100f5SVaclav Hapla @*/
PetscSFComputeMultiRootOriginalNumbering(PetscSF sf,const PetscInt degree[],PetscInt * nMultiRoots,PetscInt * multiRootsOrigNumbering[])1863d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1864d71ae5a4SJacob Faibussowitsch {
1865673100f5SVaclav Hapla PetscSF msf;
186663bfac88SBarry Smith PetscInt k = 0, nroots, nmroots;
1867673100f5SVaclav Hapla
1868673100f5SVaclav Hapla PetscFunctionBegin;
1869673100f5SVaclav Hapla PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
18709566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
18714f572ea9SToby Isaac if (nroots) PetscAssertPointer(degree, 2);
18724f572ea9SToby Isaac if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
18734f572ea9SToby Isaac PetscAssertPointer(multiRootsOrigNumbering, 4);
18749566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &msf));
18759566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
18769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
187763bfac88SBarry Smith for (PetscInt i = 0; i < nroots; i++) {
1878673100f5SVaclav Hapla if (!degree[i]) continue;
187963bfac88SBarry Smith for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1880673100f5SVaclav Hapla }
188108401ef6SPierre Jolivet PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
188266dfcd1aSVaclav Hapla if (nMultiRoots) *nMultiRoots = nmroots;
18833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1884673100f5SVaclav Hapla }
1885673100f5SVaclav Hapla
188695fce210SBarry Smith /*@C
1887cab54364SBarry Smith PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
188895fce210SBarry Smith
188995fce210SBarry Smith Collective
189095fce210SBarry Smith
18914165533cSJose E. Roman Input Parameters:
189295fce210SBarry Smith + sf - star forest
189395fce210SBarry Smith . unit - data type
189495fce210SBarry Smith - leafdata - leaf data to gather to roots
189595fce210SBarry Smith
18964165533cSJose E. Roman Output Parameter:
18972f04c522SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
189895fce210SBarry Smith
189995fce210SBarry Smith Level: intermediate
190095fce210SBarry Smith
19012f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
190295fce210SBarry Smith @*/
PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * multirootdata)1903d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1904d71ae5a4SJacob Faibussowitsch {
1905a5526d50SJunchao Zhang PetscSF multi = NULL;
190695fce210SBarry Smith
190795fce210SBarry Smith PetscFunctionBegin;
190895fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19099566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
19109566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi));
19119566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
191395fce210SBarry Smith }
191495fce210SBarry Smith
191595fce210SBarry Smith /*@C
1916cab54364SBarry Smith PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
191795fce210SBarry Smith
191895fce210SBarry Smith Collective
191995fce210SBarry Smith
19204165533cSJose E. Roman Input Parameters:
192195fce210SBarry Smith + sf - star forest
192295fce210SBarry Smith . unit - data type
192395fce210SBarry Smith - leafdata - leaf data to gather to roots
192495fce210SBarry Smith
19254165533cSJose E. Roman Output Parameter:
19262f04c522SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
192795fce210SBarry Smith
192895fce210SBarry Smith Level: intermediate
192995fce210SBarry Smith
19302f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
193195fce210SBarry Smith @*/
PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * multirootdata)1932d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1933d71ae5a4SJacob Faibussowitsch {
1934a5526d50SJunchao Zhang PetscSF multi = NULL;
193595fce210SBarry Smith
193695fce210SBarry Smith PetscFunctionBegin;
193795fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19389566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi));
19399566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
19403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
194195fce210SBarry Smith }
194295fce210SBarry Smith
194395fce210SBarry Smith /*@C
1944cab54364SBarry Smith PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
194595fce210SBarry Smith
194695fce210SBarry Smith Collective
194795fce210SBarry Smith
19484165533cSJose E. Roman Input Parameters:
194995fce210SBarry Smith + sf - star forest
195095fce210SBarry Smith . unit - data type
19512f04c522SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has)
195295fce210SBarry Smith
19534165533cSJose E. Roman Output Parameter:
195495fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
195595fce210SBarry Smith
195695fce210SBarry Smith Level: intermediate
195795fce210SBarry Smith
19582f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
195995fce210SBarry Smith @*/
PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void * multirootdata,void * leafdata)1960d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1961d71ae5a4SJacob Faibussowitsch {
1962a5526d50SJunchao Zhang PetscSF multi = NULL;
196395fce210SBarry Smith
196495fce210SBarry Smith PetscFunctionBegin;
196595fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19669566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
19679566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi));
19689566063dSJacob Faibussowitsch PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
197095fce210SBarry Smith }
197195fce210SBarry Smith
197295fce210SBarry Smith /*@C
1973cab54364SBarry Smith PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
197495fce210SBarry Smith
197595fce210SBarry Smith Collective
197695fce210SBarry Smith
19774165533cSJose E. Roman Input Parameters:
197895fce210SBarry Smith + sf - star forest
197995fce210SBarry Smith . unit - data type
198095fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf
198195fce210SBarry Smith
19824165533cSJose E. Roman Output Parameter:
198395fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root
198495fce210SBarry Smith
198595fce210SBarry Smith Level: intermediate
198695fce210SBarry Smith
19872f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
198895fce210SBarry Smith @*/
PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void * multirootdata,void * leafdata)1989d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1990d71ae5a4SJacob Faibussowitsch {
1991a5526d50SJunchao Zhang PetscSF multi = NULL;
199295fce210SBarry Smith
199395fce210SBarry Smith PetscFunctionBegin;
199495fce210SBarry Smith PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
19959566063dSJacob Faibussowitsch PetscCall(PetscSFGetMultiSF(sf, &multi));
19969566063dSJacob Faibussowitsch PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
19973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
199895fce210SBarry Smith }
1999a7b3aa13SAta Mesgarnejad
PetscSFCheckLeavesUnique_Private(PetscSF sf)2000d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2001d71ae5a4SJacob Faibussowitsch {
2002a072220fSLawrence Mitchell PetscInt i, n, nleaves;
2003a072220fSLawrence Mitchell const PetscInt *ilocal = NULL;
2004a072220fSLawrence Mitchell PetscHSetI seen;
2005a072220fSLawrence Mitchell
2006a072220fSLawrence Mitchell PetscFunctionBegin;
2007b458e8f1SJose E. Roman if (PetscDefined(USE_DEBUG)) {
20089566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
20099566063dSJacob Faibussowitsch PetscCall(PetscHSetICreate(&seen));
2010a072220fSLawrence Mitchell for (i = 0; i < nleaves; i++) {
2011a072220fSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i;
20129566063dSJacob Faibussowitsch PetscCall(PetscHSetIAdd(seen, leaf));
2013a072220fSLawrence Mitchell }
20149566063dSJacob Faibussowitsch PetscCall(PetscHSetIGetSize(seen, &n));
201508401ef6SPierre Jolivet PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
20169566063dSJacob Faibussowitsch PetscCall(PetscHSetIDestroy(&seen));
2017b458e8f1SJose E. Roman }
20183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2019a072220fSLawrence Mitchell }
202054729392SStefano Zampini
2021a7b3aa13SAta Mesgarnejad /*@
2022cab54364SBarry Smith PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2023a7b3aa13SAta Mesgarnejad
2024a7b3aa13SAta Mesgarnejad Input Parameters:
2025cab54364SBarry Smith + sfA - The first `PetscSF`
2026cab54364SBarry Smith - sfB - The second `PetscSF`
2027a7b3aa13SAta Mesgarnejad
20282fe279fdSBarry Smith Output Parameter:
2029cab54364SBarry Smith . sfBA - The composite `PetscSF`
2030a7b3aa13SAta Mesgarnejad
2031a7b3aa13SAta Mesgarnejad Level: developer
2032a7b3aa13SAta Mesgarnejad
2033a072220fSLawrence Mitchell Notes:
2034cab54364SBarry Smith Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
203554729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots.
203654729392SStefano Zampini
203720662ed9SBarry Smith `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
203820662ed9SBarry Smith a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
20392f04c522SBarry Smith nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
20402f04c522SBarry Smith `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes.
2041a072220fSLawrence Mitchell
20422f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2043a7b3aa13SAta Mesgarnejad @*/
PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF * sfBA)2044d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2045d71ae5a4SJacob Faibussowitsch {
2046a7b3aa13SAta Mesgarnejad const PetscSFNode *remotePointsA, *remotePointsB;
2047d41018fbSJunchao Zhang PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
204854729392SStefano Zampini const PetscInt *localPointsA, *localPointsB;
204954729392SStefano Zampini PetscInt *localPointsBA;
205054729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
205154729392SStefano Zampini PetscBool denseB;
2052a7b3aa13SAta Mesgarnejad
2053a7b3aa13SAta Mesgarnejad PetscFunctionBegin;
2054a7b3aa13SAta Mesgarnejad PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
205529046d53SLisandro Dalcin PetscSFCheckGraphSet(sfA, 1);
205629046d53SLisandro Dalcin PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
205729046d53SLisandro Dalcin PetscSFCheckGraphSet(sfB, 2);
205854729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2);
20594f572ea9SToby Isaac PetscAssertPointer(sfBA, 3);
20609566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
20619566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
206254729392SStefano Zampini
20639566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
20649566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
206520662ed9SBarry Smith /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
206620662ed9SBarry Smith numRootsB; otherwise, garbage will be broadcasted.
206720662ed9SBarry Smith Example (comm size = 1):
206820662ed9SBarry Smith sfA: 0 <- (0, 0)
206920662ed9SBarry Smith sfB: 100 <- (0, 0)
207020662ed9SBarry Smith 101 <- (0, 1)
207120662ed9SBarry Smith Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
207220662ed9SBarry Smith of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
207320662ed9SBarry Smith receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
207420662ed9SBarry Smith remotePointsA; if not recasted, point 101 would receive a garbage value. */
20759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
207654729392SStefano Zampini for (i = 0; i < numRootsB; i++) {
207754729392SStefano Zampini reorderedRemotePointsA[i].rank = -1;
207854729392SStefano Zampini reorderedRemotePointsA[i].index = -1;
207954729392SStefano Zampini }
208054729392SStefano Zampini for (i = 0; i < numLeavesA; i++) {
20810ea77edaSksagiyam PetscInt localp = localPointsA ? localPointsA[i] : i;
20820ea77edaSksagiyam
20830ea77edaSksagiyam if (localp >= numRootsB) continue;
20840ea77edaSksagiyam reorderedRemotePointsA[localp] = remotePointsA[i];
208554729392SStefano Zampini }
2086d41018fbSJunchao Zhang remotePointsA = reorderedRemotePointsA;
20879566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
20889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
20890ea77edaSksagiyam for (i = 0; i < maxleaf - minleaf + 1; i++) {
20900ea77edaSksagiyam leafdataB[i].rank = -1;
20910ea77edaSksagiyam leafdataB[i].index = -1;
20920ea77edaSksagiyam }
20936497c311SBarry Smith PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20946497c311SBarry Smith PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
20959566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA));
2096d41018fbSJunchao Zhang
209754729392SStefano Zampini denseB = (PetscBool)!localPointsB;
209854729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
209954729392SStefano Zampini if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
210054729392SStefano Zampini else numLeavesBA++;
210154729392SStefano Zampini }
210254729392SStefano Zampini if (denseB) {
2103d41018fbSJunchao Zhang localPointsBA = NULL;
2104d41018fbSJunchao Zhang remotePointsBA = leafdataB;
2105d41018fbSJunchao Zhang } else {
21069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
21079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
210854729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
210954729392SStefano Zampini const PetscInt l = localPointsB ? localPointsB[i] : i;
211054729392SStefano Zampini
211154729392SStefano Zampini if (leafdataB[l - minleaf].rank == -1) continue;
211254729392SStefano Zampini remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
211354729392SStefano Zampini localPointsBA[numLeavesBA] = l;
211454729392SStefano Zampini numLeavesBA++;
211554729392SStefano Zampini }
21169566063dSJacob Faibussowitsch PetscCall(PetscFree(leafdataB));
2117d41018fbSJunchao Zhang }
21189566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
21199566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA));
21209566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
21213ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2122a7b3aa13SAta Mesgarnejad }
21231c6ba672SJunchao Zhang
212404c0ada0SJunchao Zhang /*@
2125cab54364SBarry Smith PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
212604c0ada0SJunchao Zhang
212704c0ada0SJunchao Zhang Input Parameters:
2128cab54364SBarry Smith + sfA - The first `PetscSF`
2129cab54364SBarry Smith - sfB - The second `PetscSF`
213004c0ada0SJunchao Zhang
21312fe279fdSBarry Smith Output Parameter:
2132cab54364SBarry Smith . sfBA - The composite `PetscSF`.
213304c0ada0SJunchao Zhang
213404c0ada0SJunchao Zhang Level: developer
213504c0ada0SJunchao Zhang
213654729392SStefano Zampini Notes:
213720662ed9SBarry Smith Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
213854729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
213920662ed9SBarry Smith second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
214054729392SStefano Zampini
214120662ed9SBarry Smith `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
214220662ed9SBarry Smith a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
21432f04c522SBarry Smith roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()`
214420662ed9SBarry Smith on `sfA`, then
21452f04c522SBarry Smith a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots.
214654729392SStefano Zampini
21472f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
214804c0ada0SJunchao Zhang @*/
PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF * sfBA)2149d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2150d71ae5a4SJacob Faibussowitsch {
215104c0ada0SJunchao Zhang const PetscSFNode *remotePointsA, *remotePointsB;
215204c0ada0SJunchao Zhang PetscSFNode *remotePointsBA;
215304c0ada0SJunchao Zhang const PetscInt *localPointsA, *localPointsB;
215454729392SStefano Zampini PetscSFNode *reorderedRemotePointsA = NULL;
215554729392SStefano Zampini PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
21565b0d146aSStefano Zampini MPI_Op op;
21575b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21585b0d146aSStefano Zampini PetscBool iswin;
21595b0d146aSStefano Zampini #endif
216004c0ada0SJunchao Zhang
216104c0ada0SJunchao Zhang PetscFunctionBegin;
216204c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
216304c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfA, 1);
216404c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
216504c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfB, 2);
216654729392SStefano Zampini PetscCheckSameComm(sfA, 1, sfB, 2);
21674f572ea9SToby Isaac PetscAssertPointer(sfBA, 3);
21689566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
21699566063dSJacob Faibussowitsch PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
217054729392SStefano Zampini
21719566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
21729566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
21735b0d146aSStefano Zampini
21745b0d146aSStefano Zampini /* TODO: Check roots of sfB have degree of 1 */
21755b0d146aSStefano Zampini /* Once we implement it, we can replace the MPI_MAXLOC
217683df288dSJunchao Zhang with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
21775b0d146aSStefano Zampini We use MPI_MAXLOC only to have a deterministic output from this routine if
21785b0d146aSStefano Zampini the root condition is not meet.
21795b0d146aSStefano Zampini */
21805b0d146aSStefano Zampini op = MPI_MAXLOC;
21815b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
21825b0d146aSStefano Zampini /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
21839566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
218483df288dSJunchao Zhang if (iswin) op = MPI_REPLACE;
21855b0d146aSStefano Zampini #endif
21865b0d146aSStefano Zampini
21879566063dSJacob Faibussowitsch PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
21889566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
218954729392SStefano Zampini for (i = 0; i < maxleaf - minleaf + 1; i++) {
219054729392SStefano Zampini reorderedRemotePointsA[i].rank = -1;
219154729392SStefano Zampini reorderedRemotePointsA[i].index = -1;
219254729392SStefano Zampini }
219354729392SStefano Zampini if (localPointsA) {
219454729392SStefano Zampini for (i = 0; i < numLeavesA; i++) {
219554729392SStefano Zampini if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
219654729392SStefano Zampini reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
219754729392SStefano Zampini }
219854729392SStefano Zampini } else {
219954729392SStefano Zampini for (i = 0; i < numLeavesA; i++) {
220054729392SStefano Zampini if (i > maxleaf || i < minleaf) continue;
220154729392SStefano Zampini reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
220254729392SStefano Zampini }
220354729392SStefano Zampini }
220454729392SStefano Zampini
22059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
22069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
220754729392SStefano Zampini for (i = 0; i < numRootsB; i++) {
220854729392SStefano Zampini remotePointsBA[i].rank = -1;
220954729392SStefano Zampini remotePointsBA[i].index = -1;
221054729392SStefano Zampini }
221154729392SStefano Zampini
22126497c311SBarry Smith PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22136497c311SBarry Smith PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
22149566063dSJacob Faibussowitsch PetscCall(PetscFree(reorderedRemotePointsA));
221554729392SStefano Zampini for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
221654729392SStefano Zampini if (remotePointsBA[i].rank == -1) continue;
221754729392SStefano Zampini remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
221854729392SStefano Zampini remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
221954729392SStefano Zampini localPointsBA[numLeavesBA] = i;
222054729392SStefano Zampini numLeavesBA++;
222154729392SStefano Zampini }
22229566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
22239566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(*sfBA));
22249566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
22253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
222604c0ada0SJunchao Zhang }
222704c0ada0SJunchao Zhang
22281c6ba672SJunchao Zhang /*
2229cab54364SBarry Smith PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
22301c6ba672SJunchao Zhang
22312fe279fdSBarry Smith Input Parameter:
2232cab54364SBarry Smith . sf - The global `PetscSF`
22331c6ba672SJunchao Zhang
22342fe279fdSBarry Smith Output Parameter:
2235cab54364SBarry Smith . out - The local `PetscSF`
2236cab54364SBarry Smith
22372f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
22381c6ba672SJunchao Zhang */
PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF * out)2239d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2240d71ae5a4SJacob Faibussowitsch {
22411c6ba672SJunchao Zhang MPI_Comm comm;
22421c6ba672SJunchao Zhang PetscMPIInt myrank;
22431c6ba672SJunchao Zhang const PetscInt *ilocal;
22441c6ba672SJunchao Zhang const PetscSFNode *iremote;
22451c6ba672SJunchao Zhang PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
22461c6ba672SJunchao Zhang PetscSFNode *liremote;
22471c6ba672SJunchao Zhang PetscSF lsf;
22481c6ba672SJunchao Zhang
22491c6ba672SJunchao Zhang PetscFunctionBegin;
22501c6ba672SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2251dbbe0bcdSBarry Smith if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2252dbbe0bcdSBarry Smith else {
2253835f2295SStefano Zampini PetscMPIInt irank;
2254835f2295SStefano Zampini
22551c6ba672SJunchao Zhang /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
22569566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22579566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &myrank));
22581c6ba672SJunchao Zhang
22591c6ba672SJunchao Zhang /* Find out local edges and build a local SF */
22609566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
22619371c9d4SSatish Balay for (i = lnleaves = 0; i < nleaves; i++) {
2262835f2295SStefano Zampini PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2263835f2295SStefano Zampini if (irank == myrank) lnleaves++;
22649371c9d4SSatish Balay }
22659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &lilocal));
22669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(lnleaves, &liremote));
22671c6ba672SJunchao Zhang
22681c6ba672SJunchao Zhang for (i = j = 0; i < nleaves; i++) {
2269835f2295SStefano Zampini PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2270835f2295SStefano Zampini if (irank == myrank) {
22711c6ba672SJunchao Zhang lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
22721c6ba672SJunchao Zhang liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
22731c6ba672SJunchao Zhang liremote[j].index = iremote[i].index;
22741c6ba672SJunchao Zhang j++;
22751c6ba672SJunchao Zhang }
22761c6ba672SJunchao Zhang }
22779566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
22789566063dSJacob Faibussowitsch PetscCall(PetscSFSetFromOptions(lsf));
22799566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
22809566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(lsf));
22811c6ba672SJunchao Zhang *out = lsf;
22821c6ba672SJunchao Zhang }
22833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
22841c6ba672SJunchao Zhang }
2285dd5b3ca6SJunchao Zhang
2286dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata)2287d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2288d71ae5a4SJacob Faibussowitsch {
2289eb02082bSJunchao Zhang PetscMemType rootmtype, leafmtype;
2290dd5b3ca6SJunchao Zhang
2291dd5b3ca6SJunchao Zhang PetscFunctionBegin;
2292dd5b3ca6SJunchao Zhang PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
22939566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(sf));
22949566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
22959566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(rootdata, &rootmtype));
22969566063dSJacob Faibussowitsch PetscCall(PetscGetMemType(leafdata, &leafmtype));
2297dbbe0bcdSBarry Smith PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
22989566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
22993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2300dd5b3ca6SJunchao Zhang }
2301dd5b3ca6SJunchao Zhang
2302157edd7aSVaclav Hapla /*@
23032f04c522SBarry Smith PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF`
2304157edd7aSVaclav Hapla
2305157edd7aSVaclav Hapla Input Parameters:
2306157edd7aSVaclav Hapla + comm - the communicator
2307cab54364SBarry Smith . nsfs - the number of input `PetscSF`
2308cab54364SBarry Smith . sfs - the array of input `PetscSF`
23091f40158dSVaclav Hapla . rootMode - the root mode specifying how roots are handled
231020662ed9SBarry Smith - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2311157edd7aSVaclav Hapla
23122fe279fdSBarry Smith Output Parameter:
2313cab54364SBarry Smith . newsf - The resulting `PetscSF`
2314157edd7aSVaclav Hapla
23151f40158dSVaclav Hapla Level: advanced
2316157edd7aSVaclav Hapla
2317157edd7aSVaclav Hapla Notes:
231820662ed9SBarry Smith The communicator of all `PetscSF`s in `sfs` must be comm.
2319157edd7aSVaclav Hapla
232020662ed9SBarry Smith Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
232120662ed9SBarry Smith
232220662ed9SBarry Smith The offsets in `leafOffsets` are added to the original leaf indices.
232320662ed9SBarry Smith
23242f04c522SBarry Smith If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
232520662ed9SBarry Smith In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
232620662ed9SBarry Smith
232720662ed9SBarry Smith If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2328157edd7aSVaclav Hapla In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2329157edd7aSVaclav Hapla
233020662ed9SBarry Smith All root modes retain the essential connectivity condition.
23312f04c522SBarry Smith
233220662ed9SBarry Smith If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
23332f04c522SBarry Smith
233420662ed9SBarry Smith Parameter `rootMode` controls how the input root spaces are combined.
233520662ed9SBarry Smith For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
233620662ed9SBarry Smith and is also the same in the output `PetscSF`.
23372f04c522SBarry Smith
23381f40158dSVaclav Hapla For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
23391f40158dSVaclav Hapla `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
23402f04c522SBarry Smith roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
23411f40158dSVaclav Hapla `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
23421593df67SStefano Zampini roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
23432f04c522SBarry Smith the original root MPI processes are ignored.
23441f40158dSVaclav Hapla For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
23452f04c522SBarry Smith the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process
234620662ed9SBarry Smith to keep the load balancing.
23472f04c522SBarry Smith However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes.
23481f40158dSVaclav Hapla
23491f40158dSVaclav Hapla Example:
23501f40158dSVaclav Hapla We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
235120662ed9SBarry Smith .vb
235220662ed9SBarry Smith make -C $PETSC_DIR/src/vec/is/sf/tests ex18
235320662ed9SBarry Smith for m in {local,global,shared}; do
2354*a8cf87e0SJunchao Zhang mpiexec -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
235520662ed9SBarry Smith done
235620662ed9SBarry Smith .ve
235720662ed9SBarry Smith we generate two identical `PetscSF`s sf_0 and sf_1,
235820662ed9SBarry Smith .vb
235920662ed9SBarry Smith PetscSF Object: sf_0 2 MPI processes
236020662ed9SBarry Smith type: basic
236120662ed9SBarry Smith rank #leaves #roots
236220662ed9SBarry Smith [ 0] 4 2
236320662ed9SBarry Smith [ 1] 4 2
236420662ed9SBarry Smith leaves roots roots in global numbering
236520662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0
236620662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1
236720662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 2
236820662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 3
236920662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0
237020662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1
237120662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 2
237220662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 3
237320662ed9SBarry Smith .ve
2374e33f79d8SJacob Faibussowitsch and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
237520662ed9SBarry Smith .vb
237620662ed9SBarry Smith rootMode = local:
237720662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes
237820662ed9SBarry Smith type: basic
237920662ed9SBarry Smith rank #leaves #roots
238020662ed9SBarry Smith [ 0] 8 4
238120662ed9SBarry Smith [ 1] 8 4
238220662ed9SBarry Smith leaves roots roots in global numbering
238320662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0
238420662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1
238520662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 4
238620662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 5
238720662ed9SBarry Smith ( 0, 4) <- ( 0, 2) = 2
238820662ed9SBarry Smith ( 0, 5) <- ( 0, 3) = 3
238920662ed9SBarry Smith ( 0, 6) <- ( 1, 2) = 6
239020662ed9SBarry Smith ( 0, 7) <- ( 1, 3) = 7
239120662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0
239220662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1
239320662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 4
239420662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 5
239520662ed9SBarry Smith ( 1, 4) <- ( 0, 2) = 2
239620662ed9SBarry Smith ( 1, 5) <- ( 0, 3) = 3
239720662ed9SBarry Smith ( 1, 6) <- ( 1, 2) = 6
239820662ed9SBarry Smith ( 1, 7) <- ( 1, 3) = 7
239920662ed9SBarry Smith
240020662ed9SBarry Smith rootMode = global:
240120662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes
240220662ed9SBarry Smith type: basic
240320662ed9SBarry Smith rank #leaves #roots
240420662ed9SBarry Smith [ 0] 8 4
240520662ed9SBarry Smith [ 1] 8 4
240620662ed9SBarry Smith leaves roots roots in global numbering
240720662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0
240820662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1
240920662ed9SBarry Smith ( 0, 2) <- ( 0, 2) = 2
241020662ed9SBarry Smith ( 0, 3) <- ( 0, 3) = 3
241120662ed9SBarry Smith ( 0, 4) <- ( 1, 0) = 4
241220662ed9SBarry Smith ( 0, 5) <- ( 1, 1) = 5
241320662ed9SBarry Smith ( 0, 6) <- ( 1, 2) = 6
241420662ed9SBarry Smith ( 0, 7) <- ( 1, 3) = 7
241520662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0
241620662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1
241720662ed9SBarry Smith ( 1, 2) <- ( 0, 2) = 2
241820662ed9SBarry Smith ( 1, 3) <- ( 0, 3) = 3
241920662ed9SBarry Smith ( 1, 4) <- ( 1, 0) = 4
242020662ed9SBarry Smith ( 1, 5) <- ( 1, 1) = 5
242120662ed9SBarry Smith ( 1, 6) <- ( 1, 2) = 6
242220662ed9SBarry Smith ( 1, 7) <- ( 1, 3) = 7
242320662ed9SBarry Smith
242420662ed9SBarry Smith rootMode = shared:
242520662ed9SBarry Smith PetscSF Object: result_sf 2 MPI processes
242620662ed9SBarry Smith type: basic
242720662ed9SBarry Smith rank #leaves #roots
242820662ed9SBarry Smith [ 0] 8 2
242920662ed9SBarry Smith [ 1] 8 2
243020662ed9SBarry Smith leaves roots roots in global numbering
243120662ed9SBarry Smith ( 0, 0) <- ( 0, 0) = 0
243220662ed9SBarry Smith ( 0, 1) <- ( 0, 1) = 1
243320662ed9SBarry Smith ( 0, 2) <- ( 1, 0) = 2
243420662ed9SBarry Smith ( 0, 3) <- ( 1, 1) = 3
243520662ed9SBarry Smith ( 0, 4) <- ( 0, 0) = 0
243620662ed9SBarry Smith ( 0, 5) <- ( 0, 1) = 1
243720662ed9SBarry Smith ( 0, 6) <- ( 1, 0) = 2
243820662ed9SBarry Smith ( 0, 7) <- ( 1, 1) = 3
243920662ed9SBarry Smith ( 1, 0) <- ( 0, 0) = 0
244020662ed9SBarry Smith ( 1, 1) <- ( 0, 1) = 1
244120662ed9SBarry Smith ( 1, 2) <- ( 1, 0) = 2
244220662ed9SBarry Smith ( 1, 3) <- ( 1, 1) = 3
244320662ed9SBarry Smith ( 1, 4) <- ( 0, 0) = 0
244420662ed9SBarry Smith ( 1, 5) <- ( 0, 1) = 1
244520662ed9SBarry Smith ( 1, 6) <- ( 1, 0) = 2
244620662ed9SBarry Smith ( 1, 7) <- ( 1, 1) = 3
244720662ed9SBarry Smith .ve
24481f40158dSVaclav Hapla
24492f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2450157edd7aSVaclav Hapla @*/
PetscSFConcatenate(MPI_Comm comm,PetscInt nsfs,PetscSF sfs[],PetscSFConcatenateRootMode rootMode,PetscInt leafOffsets[],PetscSF * newsf)24511f40158dSVaclav Hapla PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2452d71ae5a4SJacob Faibussowitsch {
2453157edd7aSVaclav Hapla PetscInt i, s, nLeaves, nRoots;
2454157edd7aSVaclav Hapla PetscInt *leafArrayOffsets;
2455157edd7aSVaclav Hapla PetscInt *ilocal_new;
2456157edd7aSVaclav Hapla PetscSFNode *iremote_new;
2457157edd7aSVaclav Hapla PetscBool all_ilocal_null = PETSC_FALSE;
24581f40158dSVaclav Hapla PetscLayout glayout = NULL;
24591f40158dSVaclav Hapla PetscInt *gremote = NULL;
24601f40158dSVaclav Hapla PetscMPIInt rank, size;
2461157edd7aSVaclav Hapla
2462157edd7aSVaclav Hapla PetscFunctionBegin;
246312f479c1SVaclav Hapla if (PetscDefined(USE_DEBUG)) {
2464157edd7aSVaclav Hapla PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2465157edd7aSVaclav Hapla
24669566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &dummy));
2467157edd7aSVaclav Hapla PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
24684f572ea9SToby Isaac PetscAssertPointer(sfs, 3);
2469157edd7aSVaclav Hapla for (i = 0; i < nsfs; i++) {
2470157edd7aSVaclav Hapla PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2471157edd7aSVaclav Hapla PetscCheckSameComm(dummy, 1, sfs[i], 3);
2472157edd7aSVaclav Hapla }
24731f40158dSVaclav Hapla PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
24744f572ea9SToby Isaac if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
24754f572ea9SToby Isaac PetscAssertPointer(newsf, 6);
24769566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&dummy));
2477157edd7aSVaclav Hapla }
2478157edd7aSVaclav Hapla if (!nsfs) {
24799566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf));
24809566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
24813ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2482157edd7aSVaclav Hapla }
24839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
24841f40158dSVaclav Hapla PetscCallMPI(MPI_Comm_size(comm, &size));
2485157edd7aSVaclav Hapla
24861f40158dSVaclav Hapla /* Calculate leaf array offsets */
24879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2488157edd7aSVaclav Hapla leafArrayOffsets[0] = 0;
2489157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) {
2490157edd7aSVaclav Hapla PetscInt nl;
2491157edd7aSVaclav Hapla
24929566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2493157edd7aSVaclav Hapla leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2494157edd7aSVaclav Hapla }
2495157edd7aSVaclav Hapla nLeaves = leafArrayOffsets[nsfs];
2496157edd7aSVaclav Hapla
24971f40158dSVaclav Hapla /* Calculate number of roots */
24981f40158dSVaclav Hapla switch (rootMode) {
24991f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
25001f40158dSVaclav Hapla PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
25011f40158dSVaclav Hapla if (PetscDefined(USE_DEBUG)) {
25021f40158dSVaclav Hapla for (s = 1; s < nsfs; s++) {
25031f40158dSVaclav Hapla PetscInt nr;
25041f40158dSVaclav Hapla
25051f40158dSVaclav Hapla PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
25061f40158dSVaclav Hapla PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
25071f40158dSVaclav Hapla }
25081f40158dSVaclav Hapla }
25091f40158dSVaclav Hapla } break;
25101f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
25111f40158dSVaclav Hapla /* Calculate also global layout in this case */
25121f40158dSVaclav Hapla PetscInt *nls;
25131f40158dSVaclav Hapla PetscLayout *lts;
25141f40158dSVaclav Hapla PetscInt **inds;
25151f40158dSVaclav Hapla PetscInt j;
25161f40158dSVaclav Hapla PetscInt rootOffset = 0;
25171f40158dSVaclav Hapla
25181f40158dSVaclav Hapla PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds));
25191f40158dSVaclav Hapla PetscCall(PetscLayoutCreate(comm, &glayout));
25201f40158dSVaclav Hapla glayout->bs = 1;
25211f40158dSVaclav Hapla glayout->n = 0;
25221f40158dSVaclav Hapla glayout->N = 0;
25231f40158dSVaclav Hapla for (s = 0; s < nsfs; s++) {
25241f40158dSVaclav Hapla PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s]));
25251f40158dSVaclav Hapla glayout->n += lts[s]->n;
25261f40158dSVaclav Hapla glayout->N += lts[s]->N;
25271f40158dSVaclav Hapla }
25281f40158dSVaclav Hapla PetscCall(PetscLayoutSetUp(glayout));
25291f40158dSVaclav Hapla PetscCall(PetscMalloc1(nLeaves, &gremote));
25301f40158dSVaclav Hapla for (s = 0, j = 0; s < nsfs; s++) {
25311f40158dSVaclav Hapla for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
25321f40158dSVaclav Hapla rootOffset += lts[s]->N;
25331f40158dSVaclav Hapla PetscCall(PetscLayoutDestroy(<s[s]));
25341f40158dSVaclav Hapla PetscCall(PetscFree(inds[s]));
25351f40158dSVaclav Hapla }
25361f40158dSVaclav Hapla PetscCall(PetscFree3(lts, nls, inds));
25371f40158dSVaclav Hapla nRoots = glayout->N;
25381f40158dSVaclav Hapla } break;
25391f40158dSVaclav Hapla case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
25401f40158dSVaclav Hapla /* nRoots calculated later in this case */
25411f40158dSVaclav Hapla break;
25421f40158dSVaclav Hapla default:
25431f40158dSVaclav Hapla SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
25441f40158dSVaclav Hapla }
25451f40158dSVaclav Hapla
2546157edd7aSVaclav Hapla if (!leafOffsets) {
2547157edd7aSVaclav Hapla all_ilocal_null = PETSC_TRUE;
2548157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) {
2549157edd7aSVaclav Hapla const PetscInt *ilocal;
2550157edd7aSVaclav Hapla
25519566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2552157edd7aSVaclav Hapla if (ilocal) {
2553157edd7aSVaclav Hapla all_ilocal_null = PETSC_FALSE;
2554157edd7aSVaclav Hapla break;
2555157edd7aSVaclav Hapla }
2556157edd7aSVaclav Hapla }
2557157edd7aSVaclav Hapla PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2558157edd7aSVaclav Hapla }
2559157edd7aSVaclav Hapla
2560157edd7aSVaclav Hapla /* Renumber and concatenate local leaves */
2561157edd7aSVaclav Hapla ilocal_new = NULL;
2562157edd7aSVaclav Hapla if (!all_ilocal_null) {
25639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2564157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2565157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) {
2566157edd7aSVaclav Hapla const PetscInt *ilocal;
25678e3a54c0SPierre Jolivet PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2568157edd7aSVaclav Hapla PetscInt i, nleaves_l;
2569157edd7aSVaclav Hapla
25709566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2571157edd7aSVaclav Hapla for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2572157edd7aSVaclav Hapla }
2573157edd7aSVaclav Hapla }
2574157edd7aSVaclav Hapla
2575157edd7aSVaclav Hapla /* Renumber and concatenate remote roots */
25761f40158dSVaclav Hapla if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
25771f40158dSVaclav Hapla PetscInt rootOffset = 0;
25781f40158dSVaclav Hapla
25799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2580157edd7aSVaclav Hapla for (i = 0; i < nLeaves; i++) {
2581157edd7aSVaclav Hapla iremote_new[i].rank = -1;
2582157edd7aSVaclav Hapla iremote_new[i].index = -1;
2583157edd7aSVaclav Hapla }
2584157edd7aSVaclav Hapla for (s = 0; s < nsfs; s++) {
2585157edd7aSVaclav Hapla PetscInt i, nl, nr;
2586157edd7aSVaclav Hapla PetscSF tmp_sf;
2587157edd7aSVaclav Hapla const PetscSFNode *iremote;
2588157edd7aSVaclav Hapla PetscSFNode *tmp_rootdata;
25898e3a54c0SPierre Jolivet PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2590157edd7aSVaclav Hapla
25919566063dSJacob Faibussowitsch PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
25929566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, &tmp_sf));
2593157edd7aSVaclav Hapla /* create helper SF with contiguous leaves */
25949566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
25959566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(tmp_sf));
25969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nr, &tmp_rootdata));
25971f40158dSVaclav Hapla if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2598157edd7aSVaclav Hapla for (i = 0; i < nr; i++) {
25991f40158dSVaclav Hapla tmp_rootdata[i].index = i + rootOffset;
26006497c311SBarry Smith tmp_rootdata[i].rank = rank;
2601157edd7aSVaclav Hapla }
26021f40158dSVaclav Hapla rootOffset += nr;
26031f40158dSVaclav Hapla } else {
26041f40158dSVaclav Hapla for (i = 0; i < nr; i++) {
26051f40158dSVaclav Hapla tmp_rootdata[i].index = i;
26066497c311SBarry Smith tmp_rootdata[i].rank = rank;
26071f40158dSVaclav Hapla }
26081f40158dSVaclav Hapla }
26096497c311SBarry Smith PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26106497c311SBarry Smith PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
26119566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&tmp_sf));
26129566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp_rootdata));
2613157edd7aSVaclav Hapla }
2614aa624791SPierre Jolivet if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2615157edd7aSVaclav Hapla
2616157edd7aSVaclav Hapla /* Build the new SF */
26179566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(comm, newsf));
26189566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
26191f40158dSVaclav Hapla } else {
26201f40158dSVaclav Hapla /* Build the new SF */
26211f40158dSVaclav Hapla PetscCall(PetscSFCreate(comm, newsf));
26221f40158dSVaclav Hapla PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
26231f40158dSVaclav Hapla }
26249566063dSJacob Faibussowitsch PetscCall(PetscSFSetUp(*newsf));
26251f40158dSVaclav Hapla PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
26261f40158dSVaclav Hapla PetscCall(PetscLayoutDestroy(&glayout));
26271f40158dSVaclav Hapla PetscCall(PetscFree(gremote));
26289566063dSJacob Faibussowitsch PetscCall(PetscFree(leafArrayOffsets));
26293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2630157edd7aSVaclav Hapla }
26318e54d7e8SToby Isaac
26328e54d7e8SToby Isaac /*@
26332f04c522SBarry Smith PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications.
26348e54d7e8SToby Isaac
26358e54d7e8SToby Isaac Collective
26368e54d7e8SToby Isaac
26378e54d7e8SToby Isaac Input Parameters:
26388e54d7e8SToby Isaac + sf - star forest
26392f04c522SBarry Smith . unit - the data type contained within `rootdata` and `leafdata`
26402f04c522SBarry Smith . rootdata - root data that will be used for multiple `PetscSF` communications
26412f04c522SBarry Smith - leafdata - leaf data that will be used for multiple `PetscSF` communications
26428e54d7e8SToby Isaac
26438e54d7e8SToby Isaac Level: advanced
26448e54d7e8SToby Isaac
26458e54d7e8SToby Isaac Notes:
26468e54d7e8SToby Isaac Implementations of `PetscSF` can make optimizations
26478e54d7e8SToby Isaac for repeated communication using the same memory regions, but these optimizations
26488e54d7e8SToby Isaac can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
26498e54d7e8SToby Isaac The intended pattern is
26508e54d7e8SToby Isaac
26518e54d7e8SToby Isaac .vb
26528e54d7e8SToby Isaac PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
26538e54d7e8SToby Isaac
26548e54d7e8SToby Isaac PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
26558e54d7e8SToby Isaac // repeated use of rootdata and leafdata will now be optimized
26568e54d7e8SToby Isaac
26578e54d7e8SToby Isaac PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
26588e54d7e8SToby Isaac PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
26598e54d7e8SToby Isaac // ...
26608e54d7e8SToby Isaac PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
26618e54d7e8SToby Isaac PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
26628e54d7e8SToby Isaac // ... (other communications)
26638e54d7e8SToby Isaac
26648e54d7e8SToby Isaac // rootdata and leafdata must be deregistered before freeing
26658e54d7e8SToby Isaac // skipping this can lead to undefined behavior including
26668e54d7e8SToby Isaac // deadlocks
26678e54d7e8SToby Isaac PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
26688e54d7e8SToby Isaac
26698e54d7e8SToby Isaac // it is now safe to free rootdata and leafdata
26708e54d7e8SToby Isaac PetscFree2(rootdata, leafdata);
26718e54d7e8SToby Isaac .ve
26728e54d7e8SToby Isaac
26738e54d7e8SToby Isaac If you do not register `rootdata` and `leafdata` it will not cause an error,
26748e54d7e8SToby Isaac but optimizations that reduce the setup time for each communication cannot be
26758e54d7e8SToby Isaac made. Currently, the only implementation of `PetscSF` that benefits from
26768e54d7e8SToby Isaac `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default
26778e54d7e8SToby Isaac `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
26788e54d7e8SToby Isaac
26792f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
26808e54d7e8SToby Isaac @*/
PetscSFRegisterPersistent(PetscSF sf,MPI_Datatype unit,const void * rootdata,const void * leafdata)26818e54d7e8SToby Isaac PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
26828e54d7e8SToby Isaac {
26838e54d7e8SToby Isaac PetscFunctionBegin;
26848e54d7e8SToby Isaac PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
26858e54d7e8SToby Isaac PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
26868e54d7e8SToby Isaac PetscFunctionReturn(PETSC_SUCCESS);
26878e54d7e8SToby Isaac }
26888e54d7e8SToby Isaac
26898e54d7e8SToby Isaac /*@
26902f04c522SBarry Smith PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded.
26918e54d7e8SToby Isaac
26928e54d7e8SToby Isaac Collective
26938e54d7e8SToby Isaac
26948e54d7e8SToby Isaac Input Parameters:
26958e54d7e8SToby Isaac + sf - star forest
26962f04c522SBarry Smith . unit - the data type contained within the `rootdata` and `leafdata`
26978e54d7e8SToby Isaac . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
26988e54d7e8SToby Isaac - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
26998e54d7e8SToby Isaac
27008e54d7e8SToby Isaac Level: advanced
27018e54d7e8SToby Isaac
27028e54d7e8SToby Isaac Note:
27032f04c522SBarry Smith See `PetscSFRegisterPersistent()` for when and how to use this function.
27048e54d7e8SToby Isaac
27052f04c522SBarry Smith .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
27068e54d7e8SToby Isaac @*/
PetscSFDeregisterPersistent(PetscSF sf,MPI_Datatype unit,const void * rootdata,const void * leafdata)27078e54d7e8SToby Isaac PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
27088e54d7e8SToby Isaac {
27098e54d7e8SToby Isaac PetscFunctionBegin;
27108e54d7e8SToby Isaac PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
27118e54d7e8SToby Isaac PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
27128e54d7e8SToby Isaac PetscFunctionReturn(PETSC_SUCCESS);
27138e54d7e8SToby Isaac }
2714e1187f0dSToby Isaac
PetscSFGetDatatypeSize_Internal(MPI_Comm comm,MPI_Datatype unit,MPI_Aint * size)2715e1187f0dSToby Isaac PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2716e1187f0dSToby Isaac {
2717e1187f0dSToby Isaac MPI_Aint lb, lb_true, bytes, bytes_true;
2718e1187f0dSToby Isaac
2719e1187f0dSToby Isaac PetscFunctionBegin;
2720e1187f0dSToby Isaac PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2721e1187f0dSToby Isaac PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2722e1187f0dSToby Isaac PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2723e1187f0dSToby Isaac *size = bytes;
2724e1187f0dSToby Isaac PetscFunctionReturn(PETSC_SUCCESS);
2725e1187f0dSToby Isaac }
2726