197929ea7SJunchao Zhang static char help[] = "Test VecScatterCreateToZero, VecScatterCreateToAll\n\n";
297929ea7SJunchao Zhang
397929ea7SJunchao Zhang #include <petscvec.h>
main(int argc,char ** argv)4d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
5d71ae5a4SJacob Faibussowitsch {
666100624SStefano Zampini PetscInt i, N = 10, n = PETSC_DECIDE, low, high, onlylocal = -1;
797929ea7SJunchao Zhang PetscMPIInt size, rank;
897929ea7SJunchao Zhang Vec x, y;
997929ea7SJunchao Zhang VecScatter vscat;
1097929ea7SJunchao Zhang
11327415f7SBarry Smith PetscFunctionBeginUser;
12*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help));
139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
1566100624SStefano Zampini PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
1666100624SStefano Zampini
1766100624SStefano Zampini /* Trigger special case in VecScatterCreateToAll to deal with the one-to-all pattern */
1866100624SStefano Zampini PetscCall(PetscOptionsGetInt(NULL, NULL, "-onlylocal", &onlylocal, NULL));
1966100624SStefano Zampini if (onlylocal >= 0 && onlylocal < size) n = (rank == onlylocal ? N : 0);
2097929ea7SJunchao Zhang
219566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
229566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(x));
2366100624SStefano Zampini PetscCall(VecSetSizes(x, n, N));
249566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, &high));
259566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)x, "x"));
2697929ea7SJunchao Zhang
2797929ea7SJunchao Zhang /*-------------------------------------*/
2897929ea7SJunchao Zhang /* VecScatterCreateToZero */
2997929ea7SJunchao Zhang /*-------------------------------------*/
3097929ea7SJunchao Zhang
3197929ea7SJunchao Zhang /* MPI vec x = [0, 1, 2, .., N-1] */
329566063dSJacob Faibussowitsch for (i = low; i < high; i++) PetscCall(VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES));
339566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x));
349566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x));
3597929ea7SJunchao Zhang
369566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nTesting VecScatterCreateToZero\n"));
379566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(x, &vscat, &y));
389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y, "y"));
3997929ea7SJunchao Zhang
4097929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_REPLACE, which does y = x on rank 0 */
419566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
429566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
439566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF));
4497929ea7SJunchao Zhang
4597929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_SUM, which does y += x */
469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, ADD_VALUES, SCATTER_FORWARD));
479566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, ADD_VALUES, SCATTER_FORWARD));
489566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF));
4997929ea7SJunchao Zhang
5097929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_REPLACE, which does x = y */
519566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE));
529566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE));
539566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
5497929ea7SJunchao Zhang
5597929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_SUM, which does x += y on x's local part on rank 0*/
569566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, ADD_VALUES, SCATTER_REVERSE_LOCAL));
579566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, ADD_VALUES, SCATTER_REVERSE_LOCAL));
589566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
5997929ea7SJunchao Zhang
609566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y));
619566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat));
6297929ea7SJunchao Zhang
6397929ea7SJunchao Zhang /*-------------------------------------*/
6497929ea7SJunchao Zhang /* VecScatterCreateToAll */
6597929ea7SJunchao Zhang /*-------------------------------------*/
669566063dSJacob Faibussowitsch for (i = low; i < high; i++) PetscCall(VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES));
679566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x));
689566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x));
6997929ea7SJunchao Zhang
709566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nTesting VecScatterCreateToAll\n"));
7197929ea7SJunchao Zhang
729566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToAll(x, &vscat, &y));
739566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y, "y"));
7497929ea7SJunchao Zhang
7597929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_REPLACE, which does y = x on all ranks */
769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
789566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF));
7997929ea7SJunchao Zhang
8097929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_SUM, which does y += x */
819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, ADD_VALUES, SCATTER_FORWARD));
829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, ADD_VALUES, SCATTER_FORWARD));
839566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF));
8497929ea7SJunchao Zhang
8597929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_REPLACE, which does x = y */
869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE));
879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE));
889566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
8997929ea7SJunchao Zhang
9097929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_SUM, which does x += size*y */
919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, ADD_VALUES, SCATTER_REVERSE));
929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, ADD_VALUES, SCATTER_REVERSE));
939566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD));
949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x));
959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y));
969566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat));
9797929ea7SJunchao Zhang
989566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
99b122ec5aSJacob Faibussowitsch return 0;
10097929ea7SJunchao Zhang }
10197929ea7SJunchao Zhang
10297929ea7SJunchao Zhang /*TEST
10397929ea7SJunchao Zhang
10497929ea7SJunchao Zhang testset:
10597929ea7SJunchao Zhang # N=10 is divisible by nsize, to trigger Allgather/Gather in SF
10697929ea7SJunchao Zhang nsize: 2
10726e8e884SScott Kruger # Exact numbers really matter here
10826e8e884SScott Kruger diff_args: -j
10997929ea7SJunchao Zhang filter: grep -v "type"
11097929ea7SJunchao Zhang output_file: output/ex8_1.out
11197929ea7SJunchao Zhang
11297929ea7SJunchao Zhang test:
11397929ea7SJunchao Zhang suffix: 1_standard
11497929ea7SJunchao Zhang
11597929ea7SJunchao Zhang test:
11697929ea7SJunchao Zhang suffix: 1_cuda
11726e8e884SScott Kruger # sf_backend cuda is not needed if compiling only with cuda
118bd46da1dSJunchao Zhang args: -vec_type cuda -sf_backend cuda
11997929ea7SJunchao Zhang requires: cuda
12097929ea7SJunchao Zhang
12197929ea7SJunchao Zhang test:
12226e8e884SScott Kruger suffix: 1_hip
123bd46da1dSJunchao Zhang args: -vec_type hip -sf_backend hip
12426e8e884SScott Kruger requires: hip
12526e8e884SScott Kruger
12626e8e884SScott Kruger test:
12797929ea7SJunchao Zhang suffix: 1_cuda_aware_mpi
12826e8e884SScott Kruger # sf_backend cuda is not needed if compiling only with cuda
129bd46da1dSJunchao Zhang args: -vec_type cuda -sf_backend cuda
130dfd57a17SPierre Jolivet requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)
13197929ea7SJunchao Zhang
13297929ea7SJunchao Zhang testset:
13397929ea7SJunchao Zhang # N=10 is not divisible by nsize, to trigger Allgatherv/Gatherv in SF
13497929ea7SJunchao Zhang nsize: 3
13526e8e884SScott Kruger # Exact numbers really matter here
13626e8e884SScott Kruger diff_args: -j
13766100624SStefano Zampini filter: grep -v "type" | grep -v "Process "
13897929ea7SJunchao Zhang output_file: output/ex8_2.out
13997929ea7SJunchao Zhang
14097929ea7SJunchao Zhang test:
14197929ea7SJunchao Zhang suffix: 2_standard
14297929ea7SJunchao Zhang
14397929ea7SJunchao Zhang test:
14497929ea7SJunchao Zhang suffix: 2_cuda
14526e8e884SScott Kruger # sf_backend cuda is not needed if compiling only with cuda
146bd46da1dSJunchao Zhang args: -vec_type cuda -sf_backend cuda
14797929ea7SJunchao Zhang requires: cuda
14897929ea7SJunchao Zhang
14997929ea7SJunchao Zhang test:
15026e8e884SScott Kruger suffix: 2_hip
15126e8e884SScott Kruger # sf_backend hip is not needed if compiling only with hip
152bd46da1dSJunchao Zhang args: -vec_type hip -sf_backend hip
15326e8e884SScott Kruger requires: hip
15426e8e884SScott Kruger
15526e8e884SScott Kruger test:
15697929ea7SJunchao Zhang suffix: 2_cuda_aware_mpi
157bd46da1dSJunchao Zhang args: -vec_type cuda
158dfd57a17SPierre Jolivet requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)
15997929ea7SJunchao Zhang
16066100624SStefano Zampini testset:
16166100624SStefano Zampini # trigger one-to-all pattern in Allgatherv
16266100624SStefano Zampini nsize: 3
16366100624SStefano Zampini diff_args: -j
16466100624SStefano Zampini filter: grep -v "type" | grep -v "Process "
16566100624SStefano Zampini output_file: output/ex8_3.out
16666100624SStefano Zampini args: -onlylocal 1
16766100624SStefano Zampini
16866100624SStefano Zampini test:
16966100624SStefano Zampini suffix: 2_standard_onetoall
17066100624SStefano Zampini
17166100624SStefano Zampini test:
17266100624SStefano Zampini suffix: 2_cuda_onetoall
17366100624SStefano Zampini # sf_backend cuda is not needed if compiling only with cuda
17466100624SStefano Zampini args: -vec_type cuda -sf_backend cuda
17566100624SStefano Zampini requires: cuda
17666100624SStefano Zampini
17766100624SStefano Zampini test:
17866100624SStefano Zampini suffix: 2_hip_onetoall
17966100624SStefano Zampini # sf_backend hip is not needed if compiling only with hip
18066100624SStefano Zampini args: -vec_type hip -sf_backend hip
18166100624SStefano Zampini requires: hip
18266100624SStefano Zampini
18366100624SStefano Zampini test:
18466100624SStefano Zampini suffix: 2_cuda_aware_mpi_onetoall
18566100624SStefano Zampini args: -vec_type cuda
18666100624SStefano Zampini requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE)
18766100624SStefano Zampini
18897929ea7SJunchao Zhang TEST*/
189