1 static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n"; 2 3 #include <petscvec_kokkos.hpp> 4 #include <petscdevice.h> 5 #include <petscmat.h> 6 #include <petscmat_kokkos.hpp> 7 #include <Kokkos_Core.hpp> 8 #include <Kokkos_DualView.hpp> 9 10 using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space; 11 12 int main(int argc, char **argv) 13 { 14 Mat A, B, BT; 15 PetscInt i, j, column, M, N, m, n, m_ab, n_ab; 16 PetscInt *di, *dj, *oi, *oj, nd; 17 const PetscInt *garray; 18 PetscInt *garray_h; 19 PetscScalar *oa, *da; 20 PetscScalar value; 21 PetscRandom rctx; 22 PetscBool equal, done; 23 Mat AA, AB; 24 PetscMPIInt size, rank; 25 26 // ~~~~~~~~~~~~~~~~~~~~~ 27 // This test shows the routines needed to build a kokkos matrix without preallocation 28 // on the host 29 // ~~~~~~~~~~~~~~~~~~~~~ 30 31 PetscFunctionBeginUser; 32 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 33 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 34 PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes"); 35 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 36 37 /* Create a mpiaij matrix for checking */ 38 PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A)); 39 PetscCall(MatSetFromOptions(A)); 40 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 41 PetscCall(MatSetUp(A)); 42 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx)); 43 PetscCall(PetscRandomSetFromOptions(rctx)); 44 45 for (i = 5 * rank; i < 5 * rank + 5; i++) { 46 for (j = 0; j < 5 * size; j++) { 47 PetscCall(PetscRandomGetValue(rctx, &value)); 48 column = (PetscInt)(5 * size * PetscRealPart(value)); 49 50 // rank 0 has no off-process entries 51 if (rank == 0 && (column < i || column >= i)) column = i; 52 53 PetscCall(PetscRandomGetValue(rctx, &value)); 54 PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES)); 55 } 56 } 57 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 58 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 59 60 PetscCall(MatGetSize(A, &M, &N)); 61 PetscCall(MatGetLocalSize(A, &m, &n)); 62 PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray)); 63 PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done)); 64 PetscCall(MatSeqAIJGetArray(AA, &da)); 65 PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done)); 66 PetscCall(MatSeqAIJGetArray(AB, &oa)); 67 PetscCall(MatGetSize(AB, &m_ab, &n_ab)); 68 69 Mat output_mat_local, output_mat_nonlocal; 70 // Be careful about scope given the kokkos memory reference counts 71 { 72 // Local 73 Kokkos::View<PetscScalar *> a_local_d; 74 Kokkos::View<PetscInt *> i_local_d; 75 Kokkos::View<PetscInt *> j_local_d; 76 77 // Nonlocal 78 Kokkos::View<PetscScalar *> a_nonlocal_d; 79 Kokkos::View<PetscInt *> i_nonlocal_d; 80 Kokkos::View<PetscInt *> j_nonlocal_d; 81 82 // Create device memory 83 PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5])); 84 PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1)); 85 PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5])); 86 87 // Create non-local device memory 88 PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5])); 89 PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1)); 90 PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5])); 91 92 // ~~~~~~~~~~~~~~~~~~~~~ 93 // Could fill the aij on the device - we're just going to test 94 // by copying in the existing host values 95 // ~~~~~~~~~~~~~~~~~~~~~ 96 Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h; 97 Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_local_h; 98 Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_local_h; 99 Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h; 100 Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_nonlocal_h; 101 Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_nonlocal_h; 102 103 PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5])); 104 PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1)); 105 PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5])); 106 PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5])); 107 PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1)); 108 PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5])); 109 110 // Haven't specified an exec space so these will all be synchronous 111 // and finish without a need to call fence after 112 PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h)); 113 PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h)); 114 PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h)); 115 PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h)); 116 PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h)); 117 PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h)); 118 119 // The garray passed in has to be on the host, but it can be created 120 // on device and copied to the host 121 // We're just going to copy the existing host values here 122 PetscCall(PetscMalloc1(n_ab, &garray_h)); 123 for (int i = 0; i < n_ab; i++) garray_h[i] = garray[i]; 124 125 // ~~~~~~~~~~~~~~~~~~~~~ 126 127 // ~~~~~~~~~~~~~~~~~ 128 // Test MatCreateSeqAIJKokkosWithKokkosViews 129 // ~~~~~~~~~~~~~~~~~ 130 131 // We can create our local diagonal block matrix directly on the device 132 PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local)); 133 134 // We can create our nonlocal diagonal block matrix directly on the device 135 PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal)); 136 137 // Build our MPI matrix 138 // If we provide garray and output_mat_nonlocal with local indices and the compactified size 139 // almost nothing happens on the host 140 PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B)); 141 142 PetscCall(MatEqual(A, B, &equal)); 143 PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done)); 144 PetscCall(MatSeqAIJRestoreArray(AA, &da)); 145 PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done)); 146 PetscCall(MatSeqAIJRestoreArray(AB, &oa)); 147 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()"); 148 } 149 150 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &BT)); 151 152 /* Free spaces */ 153 PetscCall(PetscRandomDestroy(&rctx)); 154 PetscCall(MatDestroy(&A)); 155 PetscCall(MatDestroy(&B)); 156 PetscCall(MatDestroy(&BT)); 157 PetscCall(PetscFinalize()); 158 return 0; 159 } 160 161 /*TEST 162 build: 163 requires: kokkos_kernels 164 165 test: 166 nsize: 2 167 args: -mat_type aijkokkos 168 requires: kokkos_kernels 169 output_file: output/empty.out 170 171 TEST*/ 172