1 static char help[] = "Solve a 2D 5-point stencil in parallel with Kokkos batched KSP and ASM solvers.\n\
2 Input parameters include:\n\
3 -n : number of mesh points in x direction\n\
4 -m : number of mesh points in y direction\n\
5 -num_local_blocks : number of local sub domains for block jacobi\n\n";
6
7 /*
8 Include "petscksp.h" so that we can use KSP solvers.
9 */
10 #include <petscksp.h>
11 #include <petscmat.h>
12
main(int argc,char ** args)13 int main(int argc, char **args)
14 {
15 Vec x, b, u; /* approx solution, RHS, exact solution */
16 Mat A, Pmat, Aseq, AA; /* linear system matrix */
17 KSP ksp; /* linear solver context */
18 PetscReal norm, norm0; /* norm of solution error */
19 PetscInt i, j, Ii, J, Istart, Iend, n = 7, m = 8, its, nblocks = 2;
20 PetscBool flg, ismpi;
21 PetscScalar v;
22 PetscMPIInt size, rank;
23 IS *loc_blocks = NULL;
24 PC pc;
25
26 PetscFunctionBeginUser;
27 PetscCall(PetscInitialize(&argc, &args, NULL, help));
28 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
29 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
30 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
31 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
32 PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_local_blocks", &nblocks, NULL));
33 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
34 Compute the matrix and right-hand-side vector that define
35 the linear system, Ax = b.
36 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
37 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
38 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
39 PetscCall(MatSetFromOptions(A));
40 PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
41 PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 3, NULL));
42 /*
43 Currently, all PETSc parallel matrix formats are partitioned by
44 contiguous chunks of rows across the processors. Determine which
45 rows of the matrix are locally owned.
46 */
47 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
48 /*
49 Set matrix elements for the 2-D, five-point stencil.
50 */
51 for (Ii = Istart; Ii < Iend; Ii++) {
52 v = -1.0;
53 i = Ii / n;
54 j = Ii - i * n;
55 if (i > 0) {
56 J = Ii - n;
57 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
58 }
59 if (i < m - 1) {
60 J = Ii + n;
61 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
62 }
63 if (j > 0) {
64 J = Ii - 1;
65 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
66 }
67 if (j < n - 1) {
68 J = Ii + 1;
69 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
70 }
71 v = 4.0;
72 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
73 }
74 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
75 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
76 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
77 Setup ASM solver and batched KSP solver data
78 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
79 /* make explicit block matrix for batch solver */
80 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpi));
81 if (!ismpi) {
82 Aseq = A;
83 } else {
84 PetscCall(MatMPIAIJGetSeqAIJ(A, &Aseq, NULL, NULL));
85 }
86 PetscCall(PCASMCreateSubdomains(Aseq, nblocks, &loc_blocks)); // A
87 Mat nest, array[10000];
88 for (Ii = 0; Ii < 10000; Ii++) array[Ii] = NULL;
89 for (PetscInt bid = 0; bid < nblocks; bid++) {
90 Mat matblock;
91 PetscCall(MatCreateSubMatrix(Aseq, loc_blocks[bid], loc_blocks[bid], MAT_INITIAL_MATRIX, &matblock));
92 //PetscCall(MatViewFromOptions(matblock, NULL, "-view_b"));
93 array[bid * nblocks + bid] = matblock;
94 }
95 PetscCall(MatCreate(PETSC_COMM_SELF, &nest));
96 PetscCall(MatSetFromOptions(nest));
97 PetscCall(MatSetType(nest, MATNEST));
98 PetscCall(MatNestSetSubMats(nest, nblocks, NULL, nblocks, NULL, array));
99 PetscCall(MatSetUp(nest));
100 PetscCall(MatConvert(nest, MATAIJKOKKOS, MAT_INITIAL_MATRIX, &AA));
101 PetscCall(MatDestroy(&nest));
102 for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(MatDestroy(&array[bid * nblocks + bid]));
103 if (ismpi) {
104 Mat AAseq;
105 PetscCall(MatCreate(PETSC_COMM_WORLD, &Pmat));
106 PetscCall(MatSetSizes(Pmat, Iend - Istart, Iend - Istart, n * m, n * m));
107 PetscCall(MatSetFromOptions(Pmat));
108 PetscCall(MatSeqAIJSetPreallocation(Pmat, 5, NULL));
109 PetscCall(MatMPIAIJSetPreallocation(Pmat, 5, NULL, 3, NULL));
110 PetscCall(MatAssemblyBegin(Pmat, MAT_FINAL_ASSEMBLY));
111 PetscCall(MatAssemblyEnd(Pmat, MAT_FINAL_ASSEMBLY));
112 PetscCall(MatMPIAIJGetSeqAIJ(Pmat, &AAseq, NULL, NULL));
113 PetscCheck(AAseq, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "No A mat");
114 PetscCall(MatAXPY(AAseq, 1.0, AA, DIFFERENT_NONZERO_PATTERN));
115 PetscCall(MatDestroy(&AA));
116 } else {
117 Pmat = AA;
118 }
119 PetscCall(MatViewFromOptions(Pmat, NULL, "-view_p"));
120 PetscCall(MatViewFromOptions(A, NULL, "-view_a"));
121
122 /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
123 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
124 PetscCall(MatCreateVecs(A, &u, &b));
125 PetscCall(MatCreateVecs(A, &x, NULL));
126 /*
127 Set exact solution; then compute right-hand-side vector.
128 By default we use an exact solution of a vector with all
129 elements of 1.0;
130 */
131 PetscCall(VecSet(u, 1.0));
132 PetscCall(MatMult(A, u, b));
133 /*
134 View the exact solution vector if desired
135 */
136 flg = PETSC_FALSE;
137 PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
138 if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));
139
140 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141 Create the linear solver and set various options
142 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
143 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
144 PetscCall(KSPSetOperators(ksp, A, Pmat));
145 PetscCall(KSPSetFromOptions(ksp));
146 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147 Setup ASM solver
148 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
149 PetscCall(KSPGetPC(ksp, &pc));
150 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
151 if (flg && nblocks > 0) {
152 for (PetscInt bid = 0, gid0 = Istart; bid < nblocks; bid++) {
153 PetscInt nn;
154 IS new_loc_blocks;
155 PetscCall(ISGetSize(loc_blocks[bid], &nn)); // size only
156 PetscCall(ISCreateStride(PETSC_COMM_SELF, nn, gid0, 1, &new_loc_blocks));
157 PetscCall(ISDestroy(&loc_blocks[bid]));
158 loc_blocks[bid] = new_loc_blocks;
159 gid0 += nn; // start of next block
160 }
161 PetscCall(PCASMSetLocalSubdomains(pc, nblocks, loc_blocks, NULL));
162 }
163 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164 Solve the linear system
165 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
166 PetscCall(KSPSolve(ksp, b, x));
167 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
168 Check the solution and clean up
169 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
170 PetscCall(VecAXPY(x, -1.0, u));
171 PetscCall(VecNorm(x, NORM_2, &norm));
172 PetscCall(VecNorm(b, NORM_2, &norm0));
173 PetscCall(KSPGetIterationNumber(ksp, &its));
174 /*
175 Print convergence information.
176 */
177 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of error %g iterations %" PetscInt_FMT "\n", (double)norm / norm0, its));
178 /*
179 cleanup
180 */
181 PetscCall(KSPDestroy(&ksp));
182 if (loc_blocks) {
183 if (0) {
184 for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(ISDestroy(&loc_blocks[bid]));
185 PetscCall(PetscFree(loc_blocks));
186 } else {
187 PetscCall(PCASMDestroySubdomains(nblocks, &loc_blocks, NULL));
188 }
189 }
190 PetscCall(VecDestroy(&u));
191 PetscCall(VecDestroy(&x));
192 PetscCall(VecDestroy(&b));
193 PetscCall(MatDestroy(&A));
194 PetscCall(MatDestroy(&Pmat));
195 PetscCall(PetscFinalize());
196 return 0;
197 }
198
199 /*TEST
200
201 testset:
202 requires: parmetis kokkos_kernels
203 args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 4
204 nsize: 4
205 test:
206 suffix: batch
207 args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
208 test:
209 suffix: asm
210 args: -ksp_type cg -pc_type asm -sub_pc_type jacobi -sub_ksp_type tfqmr -sub_ksp_rtol 1e-3
211 test:
212 suffix: batch_bicg
213 args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type bicg -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
214
215 test:
216 requires: kokkos_kernels
217 nsize: 4
218 suffix: no_metis_batch
219 args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-6 -m 37 -n 23 -num_local_blocks 4 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
220
221 test:
222 requires: kokkos_kernels
223 nsize: 1
224 suffix: serial_batch
225 args: -ksp_monitor -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 16 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-6 -mat_type aijkokkos -pc_bjkokkos_ksp_converged_reason
226
227 TEST*/
228