xref: /petsc/src/vec/is/sf/tests/ex23.c (revision d8e47b638cf8f604a99e9678e1df24f82d959cd7)
11f02d56fSJunchao Zhang static const char help[] = "Test PetscSF with integers and MPIU_2INT \n\n";
21f02d56fSJunchao Zhang 
31f02d56fSJunchao Zhang #include <petscvec.h>
41f02d56fSJunchao Zhang #include <petscsf.h>
51f02d56fSJunchao Zhang #include <petscdevice.h>
61f02d56fSJunchao Zhang 
main(int argc,char * argv[])71f02d56fSJunchao Zhang int main(int argc, char *argv[])
81f02d56fSJunchao Zhang {
91f02d56fSJunchao Zhang   PetscInt           n, n2, N = 12;
101f02d56fSJunchao Zhang   PetscInt          *indices;
111f02d56fSJunchao Zhang   IS                 ix, iy;
121f02d56fSJunchao Zhang   VecScatter         vscat;
131f02d56fSJunchao Zhang   Vec                x, y;
141f02d56fSJunchao Zhang   PetscInt           rstart, rend;
151f02d56fSJunchao Zhang   PetscInt          *xh, *yh, *xd, *yd;
161f02d56fSJunchao Zhang   PetscDeviceContext dctx;
171f02d56fSJunchao Zhang 
181f02d56fSJunchao Zhang   PetscFunctionBeginUser;
191f02d56fSJunchao Zhang   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
201f02d56fSJunchao Zhang   PetscCall(VecCreateFromOptions(PETSC_COMM_WORLD, NULL, 1, PETSC_DECIDE, N, &x));
211f02d56fSJunchao Zhang   PetscCall(VecDuplicate(x, &y));
221f02d56fSJunchao Zhang   PetscCall(VecGetLocalSize(x, &n));
231f02d56fSJunchao Zhang 
241f02d56fSJunchao Zhang   PetscCall(VecGetOwnershipRange(x, &rstart, &rend));
251f02d56fSJunchao Zhang   PetscCall(ISCreateStride(PETSC_COMM_WORLD, n, rstart, 1, &ix));
261f02d56fSJunchao Zhang   PetscCall(PetscMalloc1(n, &indices));
27*6497c311SBarry Smith   for (PetscInt i = rstart; i < rend; i++) indices[i - rstart] = i / 2;
281f02d56fSJunchao Zhang   PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, n, indices, PETSC_OWN_POINTER, &iy));
291f02d56fSJunchao Zhang   // connect y[0] to x[0..1], y[1] to x[2..3], etc
301f02d56fSJunchao Zhang   PetscCall(VecScatterCreate(y, iy, x, ix, &vscat)); // y has roots, x has leaves
311f02d56fSJunchao Zhang 
321f02d56fSJunchao Zhang   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
331f02d56fSJunchao Zhang 
341f02d56fSJunchao Zhang   // double the allocation since we will use MPIU_2INT later
351f02d56fSJunchao Zhang   n2 = 2 * n;
361f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &xh));
371f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &yh));
381f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &xd));
391f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &yd));
401f02d56fSJunchao Zhang 
411f02d56fSJunchao Zhang   for (PetscInt i = 0; i < n; i++) {
421f02d56fSJunchao Zhang     xh[i] = xh[i + n] = i + rstart;
431f02d56fSJunchao Zhang     yh[i] = yh[i + n] = i + rstart;
441f02d56fSJunchao Zhang   }
451f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, xd, xh, sizeof(PetscInt) * n2));
461f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, yd, yh, sizeof(PetscInt) * n2));
471f02d56fSJunchao Zhang 
481f02d56fSJunchao Zhang   PetscCall(PetscSFReduceWithMemTypeBegin(vscat, MPIU_INT, PETSC_MEMTYPE_DEVICE, xd, PETSC_MEMTYPE_DEVICE, yd, MPI_SUM));
491f02d56fSJunchao Zhang   PetscCall(PetscSFReduceEnd(vscat, MPIU_INT, xd, yd, MPI_SUM));
501f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, yh, yd, sizeof(PetscInt) * n));
511f02d56fSJunchao Zhang   PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy
521f02d56fSJunchao Zhang   PetscCall(PetscIntView(n, yh, PETSC_VIEWER_STDOUT_WORLD));
531f02d56fSJunchao Zhang 
541f02d56fSJunchao Zhang   PetscCall(PetscSFBcastWithMemTypeBegin(vscat, MPIU_2INT, PETSC_MEMTYPE_DEVICE, yd, PETSC_MEMTYPE_DEVICE, xd, MPI_MINLOC));
551f02d56fSJunchao Zhang   PetscCall(PetscSFBcastEnd(vscat, MPIU_2INT, yd, xd, MPI_MINLOC));
561f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, xh, xd, sizeof(PetscInt) * n2));
571f02d56fSJunchao Zhang   PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy
581f02d56fSJunchao Zhang   PetscCall(PetscIntView(n2, xh, PETSC_VIEWER_STDOUT_WORLD));
591f02d56fSJunchao Zhang 
601f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, xh));
611f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, yh));
621f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, xd));
631f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, yd));
641f02d56fSJunchao Zhang   PetscCall(ISDestroy(&ix));
651f02d56fSJunchao Zhang   PetscCall(ISDestroy(&iy));
661f02d56fSJunchao Zhang   PetscCall(VecDestroy(&x));
671f02d56fSJunchao Zhang   PetscCall(VecDestroy(&y));
681f02d56fSJunchao Zhang   PetscCall(VecScatterDestroy(&vscat));
691f02d56fSJunchao Zhang   PetscCall(PetscFinalize());
701f02d56fSJunchao Zhang }
711f02d56fSJunchao Zhang 
721f02d56fSJunchao Zhang /*TEST
731f02d56fSJunchao Zhang   testset:
741f02d56fSJunchao Zhang     output_file: output/ex23.out
751f02d56fSJunchao Zhang     nsize: 3
761f02d56fSJunchao Zhang 
771f02d56fSJunchao Zhang     test:
781f02d56fSJunchao Zhang       suffix: 1
791f02d56fSJunchao Zhang       requires: cuda
801f02d56fSJunchao Zhang 
811f02d56fSJunchao Zhang     test:
821f02d56fSJunchao Zhang       suffix: 2
831f02d56fSJunchao Zhang       requires: hip
841f02d56fSJunchao Zhang 
851f02d56fSJunchao Zhang     test:
861f02d56fSJunchao Zhang       suffix: 3
871f02d56fSJunchao Zhang       requires: sycl
881f02d56fSJunchao Zhang 
891f02d56fSJunchao Zhang TEST*/
90