1 static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n";
2
3 #include <petscvec_kokkos.hpp>
4 #include <petscdevice.h>
5 #include <petscmat.h>
6 #include <petscmat_kokkos.hpp>
7 #include <Kokkos_Core.hpp>
8 #include <Kokkos_DualView.hpp>
9
10 using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space;
11
main(int argc,char ** argv)12 int main(int argc, char **argv)
13 {
14 Mat A, B, BT;
15 PetscInt i, j, column, M, N, m, n, m_ab, n_ab;
16 PetscInt *di, *dj, *oi, *oj, nd;
17 const PetscInt *garray;
18 PetscInt *garray_h;
19 PetscScalar *oa, *da;
20 PetscScalar value;
21 PetscRandom rctx;
22 PetscBool equal, done;
23 Mat AA, AB;
24 PetscMPIInt size, rank;
25
26 // ~~~~~~~~~~~~~~~~~~~~~
27 // This test shows the routines needed to build a kokkos matrix without preallocation
28 // on the host
29 // ~~~~~~~~~~~~~~~~~~~~~
30
31 PetscFunctionBeginUser;
32 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
33 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
34 PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes");
35 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
36
37 /* Create a mpiaij matrix for checking */
38 PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A));
39 PetscCall(MatSetFromOptions(A));
40 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
41 PetscCall(MatSetUp(A));
42 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
43 PetscCall(PetscRandomSetFromOptions(rctx));
44
45 for (i = 5 * rank; i < 5 * rank + 5; i++) {
46 for (j = 0; j < 5 * size; j++) {
47 PetscCall(PetscRandomGetValue(rctx, &value));
48 column = (PetscInt)(5 * size * PetscRealPart(value));
49
50 // rank 0 has no off-process entries
51 if (rank == 0 && (column < i || column >= i)) column = i;
52
53 PetscCall(PetscRandomGetValue(rctx, &value));
54 PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES));
55 }
56 }
57 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
58 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
59
60 PetscCall(MatGetSize(A, &M, &N));
61 PetscCall(MatGetLocalSize(A, &m, &n));
62 PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray));
63 PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
64 PetscCall(MatSeqAIJGetArray(AA, &da));
65 PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
66 PetscCall(MatSeqAIJGetArray(AB, &oa));
67 PetscCall(MatGetSize(AB, &m_ab, &n_ab));
68
69 Mat output_mat_local, output_mat_nonlocal;
70 // Be careful about scope given the kokkos memory reference counts
71 {
72 // Local
73 Kokkos::View<PetscScalar *> a_local_d;
74 Kokkos::View<PetscInt *> i_local_d;
75 Kokkos::View<PetscInt *> j_local_d;
76
77 // Nonlocal
78 Kokkos::View<PetscScalar *> a_nonlocal_d;
79 Kokkos::View<PetscInt *> i_nonlocal_d;
80 Kokkos::View<PetscInt *> j_nonlocal_d;
81
82 // Create device memory
83 PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5]));
84 PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1));
85 PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5]));
86
87 // Create non-local device memory
88 PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5]));
89 PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1));
90 PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5]));
91
92 // ~~~~~~~~~~~~~~~~~~~~~
93 // Could fill the aij on the device - we're just going to test
94 // by copying in the existing host values
95 // ~~~~~~~~~~~~~~~~~~~~~
96 Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h;
97 Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_local_h;
98 Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_local_h;
99 Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h;
100 Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_nonlocal_h;
101 Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_nonlocal_h;
102
103 PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5]));
104 PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1));
105 PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5]));
106 PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5]));
107 PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1));
108 PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5]));
109
110 // Haven't specified an exec space so these will all be synchronous
111 // and finish without a need to call fence after
112 PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h));
113 PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h));
114 PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h));
115 PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h));
116 PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h));
117 PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h));
118
119 // The garray passed in has to be on the host, but it can be created
120 // on device and copied to the host
121 // We're just going to copy the existing host values here
122 PetscCall(PetscMalloc1(n_ab, &garray_h));
123 for (int i = 0; i < n_ab; i++) garray_h[i] = garray[i];
124
125 // ~~~~~~~~~~~~~~~~~~~~~
126
127 // ~~~~~~~~~~~~~~~~~
128 // Test MatCreateSeqAIJKokkosWithKokkosViews
129 // ~~~~~~~~~~~~~~~~~
130
131 // We can create our local diagonal block matrix directly on the device
132 PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local));
133
134 // We can create our nonlocal diagonal block matrix directly on the device
135 PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal));
136
137 // Build our MPI matrix
138 // If we provide garray and output_mat_nonlocal with local indices and the compactified size
139 // almost nothing happens on the host
140 PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B));
141
142 PetscCall(MatEqual(A, B, &equal));
143 PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
144 PetscCall(MatSeqAIJRestoreArray(AA, &da));
145 PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
146 PetscCall(MatSeqAIJRestoreArray(AB, &oa));
147 PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()");
148 }
149
150 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &BT));
151
152 /* Free spaces */
153 PetscCall(PetscRandomDestroy(&rctx));
154 PetscCall(MatDestroy(&A));
155 PetscCall(MatDestroy(&B));
156 PetscCall(MatDestroy(&BT));
157 PetscCall(PetscFinalize());
158 return 0;
159 }
160
161 /*TEST
162 build:
163 requires: kokkos_kernels
164
165 test:
166 nsize: 2
167 args: -mat_type aijkokkos
168 requires: kokkos_kernels
169 output_file: output/empty.out
170
171 TEST*/
172