1 static const char help[] = "Test PetscSF with integers and MPIU_2INT \n\n"; 2 3 #include <petscvec.h> 4 #include <petscsf.h> 5 #include <petscdevice.h> 6 7 int main(int argc, char *argv[]) 8 { 9 PetscInt n, n2, N = 12; 10 PetscInt *indices; 11 IS ix, iy; 12 VecScatter vscat; 13 Vec x, y; 14 PetscInt rstart, rend; 15 PetscInt *xh, *yh, *xd, *yd; 16 PetscDeviceContext dctx; 17 18 PetscFunctionBeginUser; 19 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 20 PetscCall(VecCreateFromOptions(PETSC_COMM_WORLD, NULL, 1, PETSC_DECIDE, N, &x)); 21 PetscCall(VecDuplicate(x, &y)); 22 PetscCall(VecGetLocalSize(x, &n)); 23 24 PetscCall(VecGetOwnershipRange(x, &rstart, &rend)); 25 PetscCall(ISCreateStride(PETSC_COMM_WORLD, n, rstart, 1, &ix)); 26 PetscCall(PetscMalloc1(n, &indices)); 27 for (PetscInt i = rstart; i < rend; i++) indices[i - rstart] = i / 2; 28 PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, n, indices, PETSC_OWN_POINTER, &iy)); 29 // connect y[0] to x[0..1], y[1] to x[2..3], etc 30 PetscCall(VecScatterCreate(y, iy, x, ix, &vscat)); // y has roots, x has leaves 31 32 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 33 34 // double the allocation since we will use MPIU_2INT later 35 n2 = 2 * n; 36 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &xh)); 37 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &yh)); 38 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &xd)); 39 PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &yd)); 40 41 for (PetscInt i = 0; i < n; i++) { 42 xh[i] = xh[i + n] = i + rstart; 43 yh[i] = yh[i + n] = i + rstart; 44 } 45 PetscCall(PetscDeviceMemcpy(dctx, xd, xh, sizeof(PetscInt) * n2)); 46 PetscCall(PetscDeviceMemcpy(dctx, yd, yh, sizeof(PetscInt) * n2)); 47 48 PetscCall(PetscSFReduceWithMemTypeBegin(vscat, MPIU_INT, PETSC_MEMTYPE_DEVICE, xd, PETSC_MEMTYPE_DEVICE, yd, MPI_SUM)); 49 PetscCall(PetscSFReduceEnd(vscat, MPIU_INT, xd, yd, MPI_SUM)); 50 PetscCall(PetscDeviceMemcpy(dctx, yh, yd, sizeof(PetscInt) * n)); 51 PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy 52 PetscCall(PetscIntView(n, yh, PETSC_VIEWER_STDOUT_WORLD)); 53 54 PetscCall(PetscSFBcastWithMemTypeBegin(vscat, MPIU_2INT, PETSC_MEMTYPE_DEVICE, yd, PETSC_MEMTYPE_DEVICE, xd, MPI_MINLOC)); 55 PetscCall(PetscSFBcastEnd(vscat, MPIU_2INT, yd, xd, MPI_MINLOC)); 56 PetscCall(PetscDeviceMemcpy(dctx, xh, xd, sizeof(PetscInt) * n2)); 57 PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy 58 PetscCall(PetscIntView(n2, xh, PETSC_VIEWER_STDOUT_WORLD)); 59 60 PetscCall(PetscDeviceFree(dctx, xh)); 61 PetscCall(PetscDeviceFree(dctx, yh)); 62 PetscCall(PetscDeviceFree(dctx, xd)); 63 PetscCall(PetscDeviceFree(dctx, yd)); 64 PetscCall(ISDestroy(&ix)); 65 PetscCall(ISDestroy(&iy)); 66 PetscCall(VecDestroy(&x)); 67 PetscCall(VecDestroy(&y)); 68 PetscCall(VecScatterDestroy(&vscat)); 69 PetscCall(PetscFinalize()); 70 } 71 72 /*TEST 73 testset: 74 output_file: output/ex23.out 75 nsize: 3 76 77 test: 78 suffix: 1 79 requires: cuda 80 81 test: 82 suffix: 2 83 requires: hip 84 85 test: 86 suffix: 3 87 requires: sycl 88 89 TEST*/ 90