1c0c276a7Ssdargavi static char help[] = "Testing MatCreateSeqAIJKokkosWithKokkosViews() and building Mat on the device.\n\n";
2c0c276a7Ssdargavi
3c0c276a7Ssdargavi #include <petscvec_kokkos.hpp>
4c0c276a7Ssdargavi #include <petscdevice.h>
5c0c276a7Ssdargavi #include <petscmat.h>
6c0c276a7Ssdargavi #include <petscmat_kokkos.hpp>
7c0c276a7Ssdargavi #include <Kokkos_Core.hpp>
8c0c276a7Ssdargavi #include <Kokkos_DualView.hpp>
9c0c276a7Ssdargavi
10c0c276a7Ssdargavi using HostMirrorMemorySpace = Kokkos::DualView<PetscScalar *>::host_mirror_space::memory_space;
11c0c276a7Ssdargavi
main(int argc,char ** argv)12c0c276a7Ssdargavi int main(int argc, char **argv)
13c0c276a7Ssdargavi {
14ecd797f4SJunchao Zhang Mat A, B, BT;
15c0c276a7Ssdargavi PetscInt i, j, column, M, N, m, n, m_ab, n_ab;
16c0c276a7Ssdargavi PetscInt *di, *dj, *oi, *oj, nd;
17c0c276a7Ssdargavi const PetscInt *garray;
18c0c276a7Ssdargavi PetscInt *garray_h;
19c0c276a7Ssdargavi PetscScalar *oa, *da;
20c0c276a7Ssdargavi PetscScalar value;
21c0c276a7Ssdargavi PetscRandom rctx;
22c0c276a7Ssdargavi PetscBool equal, done;
23c0c276a7Ssdargavi Mat AA, AB;
24c0c276a7Ssdargavi PetscMPIInt size, rank;
25c0c276a7Ssdargavi
26c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~
27c0c276a7Ssdargavi // This test shows the routines needed to build a kokkos matrix without preallocation
28c0c276a7Ssdargavi // on the host
29c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~
30c0c276a7Ssdargavi
31c0c276a7Ssdargavi PetscFunctionBeginUser;
32c0c276a7Ssdargavi PetscCall(PetscInitialize(&argc, &argv, NULL, help));
33c0c276a7Ssdargavi PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
34c0c276a7Ssdargavi PetscCheck(size > 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Must run with 2 or more processes");
35c0c276a7Ssdargavi PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
36c0c276a7Ssdargavi
37c0c276a7Ssdargavi /* Create a mpiaij matrix for checking */
38c0c276a7Ssdargavi PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, 5, 5, PETSC_DECIDE, PETSC_DECIDE, 0, NULL, 0, NULL, &A));
39c0c276a7Ssdargavi PetscCall(MatSetFromOptions(A));
40c0c276a7Ssdargavi PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
41c0c276a7Ssdargavi PetscCall(MatSetUp(A));
42c0c276a7Ssdargavi PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
43c0c276a7Ssdargavi PetscCall(PetscRandomSetFromOptions(rctx));
44c0c276a7Ssdargavi
45c0c276a7Ssdargavi for (i = 5 * rank; i < 5 * rank + 5; i++) {
46c0c276a7Ssdargavi for (j = 0; j < 5 * size; j++) {
47c0c276a7Ssdargavi PetscCall(PetscRandomGetValue(rctx, &value));
48c0c276a7Ssdargavi column = (PetscInt)(5 * size * PetscRealPart(value));
49ecd797f4SJunchao Zhang
50ecd797f4SJunchao Zhang // rank 0 has no off-process entries
51ecd797f4SJunchao Zhang if (rank == 0 && (column < i || column >= i)) column = i;
52ecd797f4SJunchao Zhang
53c0c276a7Ssdargavi PetscCall(PetscRandomGetValue(rctx, &value));
54c0c276a7Ssdargavi PetscCall(MatSetValues(A, 1, &i, 1, &column, &value, INSERT_VALUES));
55c0c276a7Ssdargavi }
56c0c276a7Ssdargavi }
57c0c276a7Ssdargavi PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
58c0c276a7Ssdargavi PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
59c0c276a7Ssdargavi
60c0c276a7Ssdargavi PetscCall(MatGetSize(A, &M, &N));
61c0c276a7Ssdargavi PetscCall(MatGetLocalSize(A, &m, &n));
62c0c276a7Ssdargavi PetscCall(MatMPIAIJGetSeqAIJ(A, &AA, &AB, &garray));
63c0c276a7Ssdargavi PetscCall(MatGetRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
64c0c276a7Ssdargavi PetscCall(MatSeqAIJGetArray(AA, &da));
65c0c276a7Ssdargavi PetscCall(MatGetRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
66c0c276a7Ssdargavi PetscCall(MatSeqAIJGetArray(AB, &oa));
67c0c276a7Ssdargavi PetscCall(MatGetSize(AB, &m_ab, &n_ab));
68c0c276a7Ssdargavi
69c0c276a7Ssdargavi Mat output_mat_local, output_mat_nonlocal;
70c0c276a7Ssdargavi // Be careful about scope given the kokkos memory reference counts
71c0c276a7Ssdargavi {
72c0c276a7Ssdargavi // Local
73c0c276a7Ssdargavi Kokkos::View<PetscScalar *> a_local_d;
74c0c276a7Ssdargavi Kokkos::View<PetscInt *> i_local_d;
75c0c276a7Ssdargavi Kokkos::View<PetscInt *> j_local_d;
76c0c276a7Ssdargavi
77c0c276a7Ssdargavi // Nonlocal
78c0c276a7Ssdargavi Kokkos::View<PetscScalar *> a_nonlocal_d;
79c0c276a7Ssdargavi Kokkos::View<PetscInt *> i_nonlocal_d;
80c0c276a7Ssdargavi Kokkos::View<PetscInt *> j_nonlocal_d;
81c0c276a7Ssdargavi
82c0c276a7Ssdargavi // Create device memory
83c0c276a7Ssdargavi PetscCallCXX(a_local_d = Kokkos::View<PetscScalar *>("a_local_d", di[5]));
84c0c276a7Ssdargavi PetscCallCXX(i_local_d = Kokkos::View<PetscInt *>("i_local_d", m + 1));
85c0c276a7Ssdargavi PetscCallCXX(j_local_d = Kokkos::View<PetscInt *>("j_local_d", di[5]));
86c0c276a7Ssdargavi
87c0c276a7Ssdargavi // Create non-local device memory
88c0c276a7Ssdargavi PetscCallCXX(a_nonlocal_d = Kokkos::View<PetscScalar *>("a_nonlocal_d", oi[5]));
89c0c276a7Ssdargavi PetscCallCXX(i_nonlocal_d = Kokkos::View<PetscInt *>("i_nonlocal_d", m + 1));
90c0c276a7Ssdargavi PetscCallCXX(j_nonlocal_d = Kokkos::View<PetscInt *>("j_nonlocal_d", oi[5]));
91c0c276a7Ssdargavi
92c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~
93c0c276a7Ssdargavi // Could fill the aij on the device - we're just going to test
94c0c276a7Ssdargavi // by copying in the existing host values
95c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~
96c0c276a7Ssdargavi Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_local_h;
97c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_local_h;
98c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_local_h;
99c0c276a7Ssdargavi Kokkos::View<PetscScalar *, HostMirrorMemorySpace> a_nonlocal_h;
100c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> i_nonlocal_h;
101c0c276a7Ssdargavi Kokkos::View<PetscInt *, HostMirrorMemorySpace> j_nonlocal_h;
102c0c276a7Ssdargavi
103c0c276a7Ssdargavi PetscCallCXX(a_local_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(da, di[5]));
104c0c276a7Ssdargavi PetscCallCXX(i_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(di, m + 1));
105c0c276a7Ssdargavi PetscCallCXX(j_local_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(dj, di[5]));
106c0c276a7Ssdargavi PetscCallCXX(a_nonlocal_h = Kokkos::View<PetscScalar *, HostMirrorMemorySpace>(oa, oi[5]));
107c0c276a7Ssdargavi PetscCallCXX(i_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oi, m + 1));
108c0c276a7Ssdargavi PetscCallCXX(j_nonlocal_h = Kokkos::View<PetscInt *, HostMirrorMemorySpace>(oj, oi[5]));
109c0c276a7Ssdargavi
110c0c276a7Ssdargavi // Haven't specified an exec space so these will all be synchronous
111c0c276a7Ssdargavi // and finish without a need to call fence after
112c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(a_local_d, a_local_h));
113c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(i_local_d, i_local_h));
114c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(j_local_d, j_local_h));
115c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(a_nonlocal_d, a_nonlocal_h));
116c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(i_nonlocal_d, i_nonlocal_h));
117c0c276a7Ssdargavi PetscCallCXX(Kokkos::deep_copy(j_nonlocal_d, j_nonlocal_h));
118c0c276a7Ssdargavi
119c0c276a7Ssdargavi // The garray passed in has to be on the host, but it can be created
120c0c276a7Ssdargavi // on device and copied to the host
121c0c276a7Ssdargavi // We're just going to copy the existing host values here
122c0c276a7Ssdargavi PetscCall(PetscMalloc1(n_ab, &garray_h));
123*ac530a7eSPierre Jolivet for (int i = 0; i < n_ab; i++) garray_h[i] = garray[i];
124c0c276a7Ssdargavi
125c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~~~~~
126c0c276a7Ssdargavi
127c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~
128c0c276a7Ssdargavi // Test MatCreateSeqAIJKokkosWithKokkosViews
129c0c276a7Ssdargavi // ~~~~~~~~~~~~~~~~~
130c0c276a7Ssdargavi
131c0c276a7Ssdargavi // We can create our local diagonal block matrix directly on the device
132c0c276a7Ssdargavi PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n, i_local_d, j_local_d, a_local_d, &output_mat_local));
133c0c276a7Ssdargavi
134c0c276a7Ssdargavi // We can create our nonlocal diagonal block matrix directly on the device
135c0c276a7Ssdargavi PetscCall(MatCreateSeqAIJKokkosWithKokkosViews(PETSC_COMM_SELF, m, n_ab, i_nonlocal_d, j_nonlocal_d, a_nonlocal_d, &output_mat_nonlocal));
136c0c276a7Ssdargavi
137c0c276a7Ssdargavi // Build our MPI matrix
138c0c276a7Ssdargavi // If we provide garray and output_mat_nonlocal with local indices and the compactified size
139c0c276a7Ssdargavi // almost nothing happens on the host
140c0c276a7Ssdargavi PetscCall(MatCreateMPIAIJWithSeqAIJ(PETSC_COMM_WORLD, M, N, output_mat_local, output_mat_nonlocal, garray_h, &B));
141c0c276a7Ssdargavi
142c0c276a7Ssdargavi PetscCall(MatEqual(A, B, &equal));
143c0c276a7Ssdargavi PetscCall(MatRestoreRowIJ(AA, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&di, (const PetscInt **)&dj, &done));
144c0c276a7Ssdargavi PetscCall(MatSeqAIJRestoreArray(AA, &da));
145c0c276a7Ssdargavi PetscCall(MatRestoreRowIJ(AB, 0, PETSC_FALSE, PETSC_FALSE, &nd, (const PetscInt **)&oi, (const PetscInt **)&oj, &done));
146c0c276a7Ssdargavi PetscCall(MatSeqAIJRestoreArray(AB, &oa));
147c0c276a7Ssdargavi PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Likely a bug in MatCreateSeqAIJKokkosWithKokkosViews()");
148ecd797f4SJunchao Zhang }
149c0c276a7Ssdargavi
150ecd797f4SJunchao Zhang PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &BT));
151c0c276a7Ssdargavi
152c0c276a7Ssdargavi /* Free spaces */
153c0c276a7Ssdargavi PetscCall(PetscRandomDestroy(&rctx));
154c0c276a7Ssdargavi PetscCall(MatDestroy(&A));
155ecd797f4SJunchao Zhang PetscCall(MatDestroy(&B));
156ecd797f4SJunchao Zhang PetscCall(MatDestroy(&BT));
157c0c276a7Ssdargavi PetscCall(PetscFinalize());
158c0c276a7Ssdargavi return 0;
159c0c276a7Ssdargavi }
160c0c276a7Ssdargavi
161c0c276a7Ssdargavi /*TEST
162c0c276a7Ssdargavi build:
163c0c276a7Ssdargavi requires: kokkos_kernels
164c0c276a7Ssdargavi
165c0c276a7Ssdargavi test:
166c0c276a7Ssdargavi nsize: 2
167c0c276a7Ssdargavi args: -mat_type aijkokkos
168c0c276a7Ssdargavi requires: kokkos_kernels
169c0c276a7Ssdargavi output_file: output/empty.out
170c0c276a7Ssdargavi
171c0c276a7Ssdargavi TEST*/
172