197929ea7SJunchao Zhang static char help[] = "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
297929ea7SJunchao Zhang 2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
38a53a0a4SSajid Ali required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
48a53a0a4SSajid Ali To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";
597929ea7SJunchao Zhang
697929ea7SJunchao Zhang #include <petscvec.h>
main(int argc,char ** argv)7d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
8d71ae5a4SJacob Faibussowitsch {
997929ea7SJunchao Zhang PetscMPIInt nproc, grank, mycolor;
1097929ea7SJunchao Zhang PetscInt i, n, N = 20, low, high;
1197929ea7SJunchao Zhang MPI_Comm subcomm;
12f3fa974cSJacob Faibussowitsch Vec x = NULL; /* global vectors on PETSC_COMM_WORLD */
13f3fa974cSJacob Faibussowitsch Vec yg = NULL; /* global vectors on PETSC_COMM_WORLD */
1497929ea7SJunchao Zhang VecScatter vscat;
1597929ea7SJunchao Zhang IS ix, iy;
168a53a0a4SSajid Ali PetscBool iscuda = PETSC_FALSE; /* Option to use VECCUDA vectors */
178a53a0a4SSajid Ali PetscBool optionflag, compareflag;
188a53a0a4SSajid Ali char vectypename[PETSC_MAX_PATH_LEN];
1997929ea7SJunchao Zhang PetscBool world2sub = PETSC_FALSE; /* Copy a vector from WORLD to a subcomm? */
2097929ea7SJunchao Zhang PetscBool sub2sub = PETSC_FALSE; /* Copy a vector from a subcomm to another subcomm? */
2197929ea7SJunchao Zhang PetscBool world2subs = PETSC_FALSE; /* Copy a vector from WORLD to multiple subcomms? */
2297929ea7SJunchao Zhang
23327415f7SBarry Smith PetscFunctionBeginUser;
24c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help));
259566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &nproc));
269566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &grank));
2797929ea7SJunchao Zhang
2808401ef6SPierre Jolivet PetscCheck(nproc >= 2, PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "This test must have at least two processes to run");
2997929ea7SJunchao Zhang
309566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, 0, "-world2sub", &world2sub, NULL));
319566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, 0, "-sub2sub", &sub2sub, NULL));
329566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, 0, "-world2subs", &world2subs, NULL));
339566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-vectype", vectypename, sizeof(vectypename), &optionflag));
348a53a0a4SSajid Ali if (optionflag) {
359566063dSJacob Faibussowitsch PetscCall(PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag));
368a53a0a4SSajid Ali if (compareflag) iscuda = PETSC_TRUE;
378a53a0a4SSajid Ali }
3897929ea7SJunchao Zhang
3997929ea7SJunchao Zhang /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
4097929ea7SJunchao Zhang mycolor = grank % 3;
419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, mycolor, grank, &subcomm));
4297929ea7SJunchao Zhang
4397929ea7SJunchao Zhang /*===========================================================================
4497929ea7SJunchao Zhang * Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
4597929ea7SJunchao Zhang * a subcommunicator of PETSC_COMM_WORLD and vice versa.
4697929ea7SJunchao Zhang *===========================================================================*/
4797929ea7SJunchao Zhang if (world2sub) {
489566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
499566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
508a53a0a4SSajid Ali if (iscuda) {
519566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECCUDA));
528a53a0a4SSajid Ali } else {
539566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECSTANDARD));
548a53a0a4SSajid Ali }
559566063dSJacob Faibussowitsch PetscCall(VecSetUp(x));
569566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)x, "x_commworld")); /* Give a name to view x clearly */
5797929ea7SJunchao Zhang
5897929ea7SJunchao Zhang /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
599566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, &high));
6097929ea7SJunchao Zhang for (i = low; i < high; i++) {
6197929ea7SJunchao Zhang PetscScalar val = -i;
629566063dSJacob Faibussowitsch PetscCall(VecSetValue(x, i, val, INSERT_VALUES));
6397929ea7SJunchao Zhang }
649566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x));
659566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x));
6697929ea7SJunchao Zhang
6797929ea7SJunchao Zhang /* Transfer x to a vector y only defined on subcomm0 and vice versa */
6897929ea7SJunchao Zhang if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
6997929ea7SJunchao Zhang Vec y;
7097929ea7SJunchao Zhang PetscScalar *yvalue;
719566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &y));
729566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
738a53a0a4SSajid Ali if (iscuda) {
749566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECCUDA));
758a53a0a4SSajid Ali } else {
769566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECSTANDARD));
778a53a0a4SSajid Ali }
789566063dSJacob Faibussowitsch PetscCall(VecSetUp(y));
799566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y, "y_subcomm_0")); /* Give a name to view y clearly */
809566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(y, &n));
818a53a0a4SSajid Ali if (iscuda) {
828a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
839566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArray(y, &yvalue));
848a53a0a4SSajid Ali #endif
858a53a0a4SSajid Ali } else {
869566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yvalue));
878a53a0a4SSajid Ali }
8897929ea7SJunchao Zhang /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
8997929ea7SJunchao Zhang Note this is a collective call. All processes have to call it and supply consistent N.
9097929ea7SJunchao Zhang */
918a53a0a4SSajid Ali if (iscuda) {
928a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
939566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, n, N, yvalue, &yg));
948a53a0a4SSajid Ali #endif
958a53a0a4SSajid Ali } else {
969566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, yvalue, &yg));
978a53a0a4SSajid Ali }
9897929ea7SJunchao Zhang
9997929ea7SJunchao Zhang /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
1009566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(yg, &low, &high)); /* low, high are global indices */
1019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, high - low, low, 1, &ix));
1029566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix, &iy));
10397929ea7SJunchao Zhang
10497929ea7SJunchao Zhang /* Union of ix's on subcomm0 covers the full range of [0,N) */
1059566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, ix, yg, iy, &vscat));
1069566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD));
1079566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD));
10897929ea7SJunchao Zhang
10997929ea7SJunchao Zhang /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
11097929ea7SJunchao Zhang VecGetArray must be paired with VecRestoreArray.
11197929ea7SJunchao Zhang */
1128a53a0a4SSajid Ali if (iscuda) {
1138a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
1149566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y, &yvalue));
1158a53a0a4SSajid Ali #endif
1168a53a0a4SSajid Ali } else {
1179566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yvalue));
1188a53a0a4SSajid Ali }
11997929ea7SJunchao Zhang
12097929ea7SJunchao Zhang /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
1219566063dSJacob Faibussowitsch PetscCall(VecView(y, PETSC_VIEWER_STDOUT_(subcomm)));
1229566063dSJacob Faibussowitsch PetscCall(VecScale(y, 2.0));
12397929ea7SJunchao Zhang
12497929ea7SJunchao Zhang /* Send the new y back to x */
125*f0b74427SPierre Jolivet PetscCall(VecGetArray(y, &yvalue)); /* If VecScale is done on GPU, PETSc will prepare a valid yvalue for access */
12697929ea7SJunchao Zhang /* Supply new yvalue to yg without memory copying */
1279566063dSJacob Faibussowitsch PetscCall(VecPlaceArray(yg, yvalue));
1289566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE));
1299566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE));
1309566063dSJacob Faibussowitsch PetscCall(VecResetArray(yg));
1319566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yvalue));
1329566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y));
13397929ea7SJunchao Zhang } else {
13497929ea7SJunchao Zhang /* Ranks outside of subcomm0 do not supply values to yg */
1358a53a0a4SSajid Ali if (iscuda) {
1368a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
1379566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, 0 /*n*/, N, NULL, &yg));
1388a53a0a4SSajid Ali #endif
1398a53a0a4SSajid Ali } else {
1409566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, 0 /*n*/, N, NULL, &yg));
1418a53a0a4SSajid Ali }
14297929ea7SJunchao Zhang
14397929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
14497929ea7SJunchao Zhang ranks just need to create empty ISes to cheat VecScatterCreate.
14597929ea7SJunchao Zhang */
1469566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &ix));
1479566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix, &iy));
14897929ea7SJunchao Zhang
1499566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, ix, yg, iy, &vscat));
1509566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD));
1519566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD));
15297929ea7SJunchao Zhang
15397929ea7SJunchao Zhang /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
15497929ea7SJunchao Zhang But they have to call VecScatterBegin/End since these routines are collective.
15597929ea7SJunchao Zhang */
1569566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE));
1579566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, yg, x, INSERT_VALUES, SCATTER_REVERSE));
15897929ea7SJunchao Zhang }
15997929ea7SJunchao Zhang
1609566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
1619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix));
1629566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy));
1639566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x));
1649566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg));
1659566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat));
16697929ea7SJunchao Zhang } /* world2sub */
16797929ea7SJunchao Zhang
16897929ea7SJunchao Zhang /*===========================================================================
16997929ea7SJunchao Zhang * Transfer a vector x defined on subcomm0 to a vector y defined on
17097929ea7SJunchao Zhang * subcomm1. The two subcomms are not overlapping and their union is
17197929ea7SJunchao Zhang * not necessarily equal to PETSC_COMM_WORLD.
17297929ea7SJunchao Zhang *===========================================================================*/
17397929ea7SJunchao Zhang if (sub2sub) {
17497929ea7SJunchao Zhang if (mycolor == 0) {
17597929ea7SJunchao Zhang /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
17697929ea7SJunchao Zhang PetscInt n, N = 22;
17797929ea7SJunchao Zhang Vec x, xg, yg;
17897929ea7SJunchao Zhang IS ix, iy;
17997929ea7SJunchao Zhang VecScatter vscat;
18097929ea7SJunchao Zhang const PetscScalar *xvalue;
18197929ea7SJunchao Zhang MPI_Comm intercomm, parentcomm;
18297929ea7SJunchao Zhang PetscMPIInt lrank;
18397929ea7SJunchao Zhang
1849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm, &lrank));
1858a53a0a4SSajid Ali /* x is on subcomm */
1869566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &x));
1879566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
1888a53a0a4SSajid Ali if (iscuda) {
1899566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECCUDA));
1908a53a0a4SSajid Ali } else {
1919566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECSTANDARD));
1928a53a0a4SSajid Ali }
1939566063dSJacob Faibussowitsch PetscCall(VecSetUp(x));
1949566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, &high));
19597929ea7SJunchao Zhang
19697929ea7SJunchao Zhang /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
19797929ea7SJunchao Zhang for (i = low; i < high; i++) {
19897929ea7SJunchao Zhang PetscScalar val = i;
1999566063dSJacob Faibussowitsch PetscCall(VecSetValue(x, i, val, INSERT_VALUES));
20097929ea7SJunchao Zhang }
2019566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x));
2029566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x));
20397929ea7SJunchao Zhang
2049566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_create(subcomm, 0, PETSC_COMM_WORLD /*peer_comm*/, 1, 100 /*tag*/, &intercomm));
20597929ea7SJunchao Zhang
20697929ea7SJunchao Zhang /* Tell rank 0 of subcomm1 the global size of x */
2079566063dSJacob Faibussowitsch if (!lrank) PetscCallMPI(MPI_Send(&N, 1, MPIU_INT, 0 /*receiver's rank in remote comm, i.e., subcomm1*/, 200 /*tag*/, intercomm));
20897929ea7SJunchao Zhang
209*f0b74427SPierre Jolivet /* Create an intracomm PETSc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
21097929ea7SJunchao Zhang But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
21197929ea7SJunchao Zhang */
2129566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_merge(intercomm, 0 /*low*/, &parentcomm));
21397929ea7SJunchao Zhang
21497929ea7SJunchao Zhang /* Create a vector xg on parentcomm, which shares memory with x */
2159566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(x, &n));
2168a53a0a4SSajid Ali if (iscuda) {
2178a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
2189566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArrayRead(x, &xvalue));
2199566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm, 1, n, N, xvalue, &xg));
2208a53a0a4SSajid Ali #endif
2218a53a0a4SSajid Ali } else {
2229566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xvalue));
2239566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm, 1, n, N, xvalue, &xg));
2248a53a0a4SSajid Ali }
22597929ea7SJunchao Zhang
22697929ea7SJunchao Zhang /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
2278a53a0a4SSajid Ali if (iscuda) {
2288a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
2299566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm, 1, 0 /*n*/, N, NULL /*array*/, &yg));
2308a53a0a4SSajid Ali #endif
2318a53a0a4SSajid Ali } else {
2329566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm, 1, 0 /*n*/, N, NULL /*array*/, &yg));
2338a53a0a4SSajid Ali }
23497929ea7SJunchao Zhang
23597929ea7SJunchao Zhang /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
2369566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(xg, &low, &high)); /* low, high are global indices of xg */
2379566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, high - low, low, 1, &ix));
2389566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix, &iy));
2399566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(xg, ix, yg, iy, &vscat));
24097929ea7SJunchao Zhang
24197929ea7SJunchao Zhang /* Scatter values from xg to yg */
2429566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD));
2439566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD));
24497929ea7SJunchao Zhang
24597929ea7SJunchao Zhang /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
2468a53a0a4SSajid Ali if (iscuda) {
2478a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
2489566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArrayRead(x, &xvalue));
2498a53a0a4SSajid Ali #endif
2508a53a0a4SSajid Ali } else {
2519566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xvalue));
2528a53a0a4SSajid Ali }
2539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x));
2549566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix));
2559566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy));
2569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xg));
2579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg));
2589566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat));
2599566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&intercomm));
2609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&parentcomm));
26197929ea7SJunchao Zhang } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
26297929ea7SJunchao Zhang PetscInt n, N;
26397929ea7SJunchao Zhang Vec y, xg, yg;
26497929ea7SJunchao Zhang IS ix, iy;
26597929ea7SJunchao Zhang VecScatter vscat;
26697929ea7SJunchao Zhang PetscScalar *yvalue;
26797929ea7SJunchao Zhang MPI_Comm intercomm, parentcomm;
26897929ea7SJunchao Zhang PetscMPIInt lrank;
26997929ea7SJunchao Zhang
2709566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(subcomm, &lrank));
2719566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_create(subcomm, 0, PETSC_COMM_WORLD /*peer_comm*/, 0 /*remote_leader*/, 100 /*tag*/, &intercomm));
27297929ea7SJunchao Zhang
27397929ea7SJunchao Zhang /* Two rank-0 are talking */
2749566063dSJacob Faibussowitsch if (!lrank) PetscCallMPI(MPI_Recv(&N, 1, MPIU_INT, 0 /*sender's rank in remote comm, i.e. subcomm0*/, 200 /*tag*/, intercomm, MPI_STATUS_IGNORE));
27597929ea7SJunchao Zhang /* Rank 0 of subcomm1 bcasts N to its members */
2769566063dSJacob Faibussowitsch PetscCallMPI(MPI_Bcast(&N, 1, MPIU_INT, 0 /*local root*/, subcomm));
27797929ea7SJunchao Zhang
278*f0b74427SPierre Jolivet /* Create a intracomm PETSc can work on */
2799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Intercomm_merge(intercomm, 1 /*high*/, &parentcomm));
28097929ea7SJunchao Zhang
28197929ea7SJunchao Zhang /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
2828a53a0a4SSajid Ali if (iscuda) {
2838a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
2849566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm, 1 /*bs*/, 0 /*n*/, N, NULL /*array*/, &xg));
2858a53a0a4SSajid Ali #endif
2868a53a0a4SSajid Ali } else {
2879566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm, 1 /*bs*/, 0 /*n*/, N, NULL /*array*/, &xg));
2888a53a0a4SSajid Ali }
28997929ea7SJunchao Zhang
2909566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &y));
2919566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
2928a53a0a4SSajid Ali if (iscuda) {
2939566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECCUDA));
2948a53a0a4SSajid Ali } else {
2959566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECSTANDARD));
2968a53a0a4SSajid Ali }
2979566063dSJacob Faibussowitsch PetscCall(VecSetUp(y));
2988a53a0a4SSajid Ali
2999566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y, "y_subcomm_1")); /* Give a name to view y clearly */
3009566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(y, &n));
3018a53a0a4SSajid Ali if (iscuda) {
3028a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
3039566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArray(y, &yvalue));
3048a53a0a4SSajid Ali #endif
3058a53a0a4SSajid Ali } else {
3069566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yvalue));
3078a53a0a4SSajid Ali }
30897929ea7SJunchao Zhang /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
30997929ea7SJunchao Zhang created in the same order in subcomm0/1. For example, we can not reverse the order of
31097929ea7SJunchao Zhang creating xg and yg in subcomm1.
31197929ea7SJunchao Zhang */
3128a53a0a4SSajid Ali if (iscuda) {
3138a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
3149566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(parentcomm, 1 /*bs*/, n, N, yvalue, &yg));
3158a53a0a4SSajid Ali #endif
3168a53a0a4SSajid Ali } else {
3179566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(parentcomm, 1 /*bs*/, n, N, yvalue, &yg));
3188a53a0a4SSajid Ali }
31997929ea7SJunchao Zhang
32097929ea7SJunchao Zhang /* Ranks in subcomm0 already specified the full range of the identity map.
32197929ea7SJunchao Zhang ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
32297929ea7SJunchao Zhang */
3239566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &ix));
3249566063dSJacob Faibussowitsch PetscCall(ISDuplicate(ix, &iy));
3259566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(xg, ix, yg, iy, &vscat));
32697929ea7SJunchao Zhang
32797929ea7SJunchao Zhang /* Scatter values from xg to yg */
3289566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD));
3299566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, xg, yg, INSERT_VALUES, SCATTER_FORWARD));
33097929ea7SJunchao Zhang
33197929ea7SJunchao Zhang /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
3328a53a0a4SSajid Ali if (iscuda) {
3338a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
3349566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y, &yvalue));
3358a53a0a4SSajid Ali #endif
3368a53a0a4SSajid Ali } else {
3379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yvalue));
3388a53a0a4SSajid Ali }
33997929ea7SJunchao Zhang
34097929ea7SJunchao Zhang /* Libraries on subcomm1 can safely use y now, for example, view it */
3419566063dSJacob Faibussowitsch PetscCall(VecView(y, PETSC_VIEWER_STDOUT_(subcomm)));
34297929ea7SJunchao Zhang
3439566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y));
3449566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix));
3459566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy));
3469566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xg));
3479566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg));
3489566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat));
3499566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&intercomm));
3509566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&parentcomm));
35197929ea7SJunchao Zhang } else if (mycolor == 2) { /* subcomm2 */
35297929ea7SJunchao Zhang /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
35397929ea7SJunchao Zhang }
35497929ea7SJunchao Zhang } /* sub2sub */
35597929ea7SJunchao Zhang
35697929ea7SJunchao Zhang /*===========================================================================
35797929ea7SJunchao Zhang * Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
35897929ea7SJunchao Zhang * every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
35997929ea7SJunchao Zhang * as we did in case 1, but that is not efficient. Instead, we use one vecscatter
36097929ea7SJunchao Zhang * to achieve that.
36197929ea7SJunchao Zhang *===========================================================================*/
36297929ea7SJunchao Zhang if (world2subs) {
3638a53a0a4SSajid Ali Vec y;
36497929ea7SJunchao Zhang PetscInt n, N = 15, xstart, ystart, low, high;
36597929ea7SJunchao Zhang PetscScalar *yvalue;
36697929ea7SJunchao Zhang
36797929ea7SJunchao Zhang /* Initialize x to [0, 1, 2, 3, ..., N-1] */
3689566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
3699566063dSJacob Faibussowitsch PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
3708a53a0a4SSajid Ali if (iscuda) {
3719566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECCUDA));
3728a53a0a4SSajid Ali } else {
3739566063dSJacob Faibussowitsch PetscCall(VecSetType(x, VECSTANDARD));
3748a53a0a4SSajid Ali }
3759566063dSJacob Faibussowitsch PetscCall(VecSetUp(x));
3769566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, &high));
3779566063dSJacob Faibussowitsch for (i = low; i < high; i++) PetscCall(VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES));
3789566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x));
3799566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x));
38097929ea7SJunchao Zhang
38197929ea7SJunchao Zhang /* Every subcomm has a y as long as x */
3829566063dSJacob Faibussowitsch PetscCall(VecCreate(subcomm, &y));
3839566063dSJacob Faibussowitsch PetscCall(VecSetSizes(y, PETSC_DECIDE, N));
3848a53a0a4SSajid Ali if (iscuda) {
3859566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECCUDA));
3868a53a0a4SSajid Ali } else {
3879566063dSJacob Faibussowitsch PetscCall(VecSetType(y, VECSTANDARD));
3888a53a0a4SSajid Ali }
3899566063dSJacob Faibussowitsch PetscCall(VecSetUp(y));
3909566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(y, &n));
39197929ea7SJunchao Zhang
39297929ea7SJunchao Zhang /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
39397929ea7SJunchao Zhang Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
39497929ea7SJunchao Zhang necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
39597929ea7SJunchao Zhang 0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
39697929ea7SJunchao Zhang */
3978a53a0a4SSajid Ali if (iscuda) {
3988a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
3999566063dSJacob Faibussowitsch PetscCall(VecCUDAGetArray(y, &yvalue));
4009566063dSJacob Faibussowitsch PetscCall(VecCreateMPICUDAWithArray(PETSC_COMM_WORLD, 1, n, PETSC_DECIDE, yvalue, &yg));
4018a53a0a4SSajid Ali #endif
4028a53a0a4SSajid Ali } else {
4039566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &yvalue));
4049566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, PETSC_DECIDE, yvalue, &yg));
4058a53a0a4SSajid Ali }
4069566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)yg, "yg_on_subcomms")); /* Give a name to view yg clearly */
40797929ea7SJunchao Zhang
40897929ea7SJunchao Zhang /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
40997929ea7SJunchao Zhang since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
41097929ea7SJunchao Zhang */
4119566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &xstart, NULL));
4129566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(yg, &ystart, NULL));
41397929ea7SJunchao Zhang
4149566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix));
4159566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy));
4169566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, ix, yg, iy, &vscat));
4179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD));
4189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, yg, INSERT_VALUES, SCATTER_FORWARD));
41997929ea7SJunchao Zhang
42097929ea7SJunchao Zhang /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
4219566063dSJacob Faibussowitsch PetscCall(VecView(yg, PETSC_VIEWER_STDOUT_WORLD));
4229566063dSJacob Faibussowitsch PetscCall(VecDestroy(&yg));
42397929ea7SJunchao Zhang
42497929ea7SJunchao Zhang /* Restory yvalue so that processes in subcomm can use y from now on. */
4258a53a0a4SSajid Ali if (iscuda) {
4268a53a0a4SSajid Ali #if defined(PETSC_HAVE_CUDA)
4279566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArray(y, &yvalue));
4288a53a0a4SSajid Ali #endif
4298a53a0a4SSajid Ali } else {
4309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &yvalue));
4318a53a0a4SSajid Ali }
4329566063dSJacob Faibussowitsch PetscCall(VecScale(y, 3.0));
43397929ea7SJunchao Zhang
4349566063dSJacob Faibussowitsch PetscCall(ISDestroy(&ix)); /* One can also destroy ix, iy immediately after VecScatterCreate() */
4359566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iy));
4369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x));
4379566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y));
4389566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat));
43997929ea7SJunchao Zhang } /* world2subs */
44097929ea7SJunchao Zhang
4419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free(&subcomm));
4429566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
443b122ec5aSJacob Faibussowitsch return 0;
44497929ea7SJunchao Zhang }
44597929ea7SJunchao Zhang
44697929ea7SJunchao Zhang /*TEST
44797929ea7SJunchao Zhang
44897929ea7SJunchao Zhang build:
449dfd57a17SPierre Jolivet requires: !defined(PETSC_HAVE_MPIUNI)
450c6c723bbSStefano Zampini
45197929ea7SJunchao Zhang testset:
45297929ea7SJunchao Zhang nsize: 7
45397929ea7SJunchao Zhang
45497929ea7SJunchao Zhang test:
45597929ea7SJunchao Zhang suffix: 1
45697929ea7SJunchao Zhang args: -world2sub
45797929ea7SJunchao Zhang
45897929ea7SJunchao Zhang test:
45997929ea7SJunchao Zhang suffix: 2
46097929ea7SJunchao Zhang args: -sub2sub
461f424265bSStefano Zampini # deadlocks with NECMPI and INTELMPI (20210400300)
4620f85934cSJunchao Zhang requires: !defined(PETSC_HAVE_NECMPI) !defined(PETSC_HAVE_I_MPI)
46397929ea7SJunchao Zhang
46497929ea7SJunchao Zhang test:
46597929ea7SJunchao Zhang suffix: 3
46697929ea7SJunchao Zhang args: -world2subs
46797929ea7SJunchao Zhang
46897929ea7SJunchao Zhang test:
46997929ea7SJunchao Zhang suffix: 4
4708a53a0a4SSajid Ali args: -world2sub -vectype cuda
4718a53a0a4SSajid Ali requires: cuda
4728a53a0a4SSajid Ali
4738a53a0a4SSajid Ali test:
4748a53a0a4SSajid Ali suffix: 5
4758a53a0a4SSajid Ali args: -sub2sub -vectype cuda
4768a53a0a4SSajid Ali requires: cuda
4778a53a0a4SSajid Ali
4788a53a0a4SSajid Ali test:
4798a53a0a4SSajid Ali suffix: 6
4808a53a0a4SSajid Ali args: -world2subs -vectype cuda
4818a53a0a4SSajid Ali requires: cuda
4828a53a0a4SSajid Ali
4836677b1c1SJunchao Zhang testset:
4846677b1c1SJunchao Zhang nsize: 7
48597929ea7SJunchao Zhang args: -world2sub -sf_type neighbor
48697929ea7SJunchao Zhang output_file: output/ex9_1.out
487c6c723bbSStefano Zampini # segfaults with NECMPI
4886677b1c1SJunchao Zhang requires: defined(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !defined(PETSC_HAVE_NECMPI)
4896677b1c1SJunchao Zhang
4906677b1c1SJunchao Zhang test:
4916677b1c1SJunchao Zhang suffix: 71
4926677b1c1SJunchao Zhang
4936677b1c1SJunchao Zhang test:
4946677b1c1SJunchao Zhang suffix: 72
4956677b1c1SJunchao Zhang requires: defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
4966677b1c1SJunchao Zhang args: -sf_neighbor_persistent
4976677b1c1SJunchao Zhang
49897929ea7SJunchao Zhang TEST*/
499