xref: /petsc/src/ksp/ksp/tutorials/ex19.c (revision 6d8694c4fbab79f9439f1ad13c0386ba7ee1ca4b)
1 static char help[] = "Solve a 2D 5-point stencil in parallel with Kokkos batched KSP and ASM solvers.\n\
2 Input parameters include:\n\
3   -n                : number of mesh points in x direction\n\
4   -m                : number of mesh points in y direction\n\
5   -num_local_blocks : number of local sub domains for block jacobi\n\n";
6 
7 /*
8   Include "petscksp.h" so that we can use KSP solvers.
9 */
10 #include <petscksp.h>
11 #include <petscmat.h>
12 
main(int argc,char ** args)13 int main(int argc, char **args)
14 {
15   Vec         x, b, u;           /* approx solution, RHS, exact solution */
16   Mat         A, Pmat, Aseq, AA; /* linear system matrix */
17   KSP         ksp;               /* linear solver context */
18   PetscReal   norm, norm0;       /* norm of solution error */
19   PetscInt    i, j, Ii, J, Istart, Iend, n = 7, m = 8, its, nblocks = 2;
20   PetscBool   flg, ismpi;
21   PetscScalar v;
22   PetscMPIInt size, rank;
23   IS         *loc_blocks = NULL;
24   PC          pc;
25 
26   PetscFunctionBeginUser;
27   PetscCall(PetscInitialize(&argc, &args, NULL, help));
28   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
29   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
30   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
31   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
32   PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_local_blocks", &nblocks, NULL));
33   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
34          Compute the matrix and right-hand-side vector that define
35          the linear system, Ax = b.
36      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
37   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
38   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
39   PetscCall(MatSetFromOptions(A));
40   PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
41   PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 3, NULL));
42   /*
43      Currently, all PETSc parallel matrix formats are partitioned by
44      contiguous chunks of rows across the processors.  Determine which
45      rows of the matrix are locally owned.
46   */
47   PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
48   /*
49     Set matrix elements for the 2-D, five-point stencil.
50     */
51   for (Ii = Istart; Ii < Iend; Ii++) {
52     v = -1.0;
53     i = Ii / n;
54     j = Ii - i * n;
55     if (i > 0) {
56       J = Ii - n;
57       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
58     }
59     if (i < m - 1) {
60       J = Ii + n;
61       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
62     }
63     if (j > 0) {
64       J = Ii - 1;
65       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
66     }
67     if (j < n - 1) {
68       J = Ii + 1;
69       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
70     }
71     v = 4.0;
72     PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
73   }
74   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
75   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
76   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
77                 Setup ASM solver and batched KSP solver data
78      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
79   /* make explicit block matrix for batch solver */
80   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpi));
81   if (!ismpi) {
82     Aseq = A;
83   } else {
84     PetscCall(MatMPIAIJGetSeqAIJ(A, &Aseq, NULL, NULL));
85   }
86   PetscCall(PCASMCreateSubdomains(Aseq, nblocks, &loc_blocks)); // A
87   Mat nest, array[10000];
88   for (Ii = 0; Ii < 10000; Ii++) array[Ii] = NULL;
89   for (PetscInt bid = 0; bid < nblocks; bid++) {
90     Mat matblock;
91     PetscCall(MatCreateSubMatrix(Aseq, loc_blocks[bid], loc_blocks[bid], MAT_INITIAL_MATRIX, &matblock));
92     //PetscCall(MatViewFromOptions(matblock, NULL, "-view_b"));
93     array[bid * nblocks + bid] = matblock;
94   }
95   PetscCall(MatCreate(PETSC_COMM_SELF, &nest));
96   PetscCall(MatSetFromOptions(nest));
97   PetscCall(MatSetType(nest, MATNEST));
98   PetscCall(MatNestSetSubMats(nest, nblocks, NULL, nblocks, NULL, array));
99   PetscCall(MatSetUp(nest));
100   PetscCall(MatConvert(nest, MATAIJKOKKOS, MAT_INITIAL_MATRIX, &AA));
101   PetscCall(MatDestroy(&nest));
102   for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(MatDestroy(&array[bid * nblocks + bid]));
103   if (ismpi) {
104     Mat AAseq;
105     PetscCall(MatCreate(PETSC_COMM_WORLD, &Pmat));
106     PetscCall(MatSetSizes(Pmat, Iend - Istart, Iend - Istart, n * m, n * m));
107     PetscCall(MatSetFromOptions(Pmat));
108     PetscCall(MatSeqAIJSetPreallocation(Pmat, 5, NULL));
109     PetscCall(MatMPIAIJSetPreallocation(Pmat, 5, NULL, 3, NULL));
110     PetscCall(MatAssemblyBegin(Pmat, MAT_FINAL_ASSEMBLY));
111     PetscCall(MatAssemblyEnd(Pmat, MAT_FINAL_ASSEMBLY));
112     PetscCall(MatMPIAIJGetSeqAIJ(Pmat, &AAseq, NULL, NULL));
113     PetscCheck(AAseq, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "No A mat");
114     PetscCall(MatAXPY(AAseq, 1.0, AA, DIFFERENT_NONZERO_PATTERN));
115     PetscCall(MatDestroy(&AA));
116   } else {
117     Pmat = AA;
118   }
119   PetscCall(MatViewFromOptions(Pmat, NULL, "-view_p"));
120   PetscCall(MatViewFromOptions(A, NULL, "-view_a"));
121 
122   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
123   PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
124   PetscCall(MatCreateVecs(A, &u, &b));
125   PetscCall(MatCreateVecs(A, &x, NULL));
126   /*
127      Set exact solution; then compute right-hand-side vector.
128      By default we use an exact solution of a vector with all
129      elements of 1.0;
130   */
131   PetscCall(VecSet(u, 1.0));
132   PetscCall(MatMult(A, u, b));
133   /*
134      View the exact solution vector if desired
135   */
136   flg = PETSC_FALSE;
137   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
138   if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));
139 
140   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141                 Create the linear solver and set various options
142      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
143   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
144   PetscCall(KSPSetOperators(ksp, A, Pmat));
145   PetscCall(KSPSetFromOptions(ksp));
146   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147     Setup ASM solver
148     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
149   PetscCall(KSPGetPC(ksp, &pc));
150   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
151   if (flg && nblocks > 0) {
152     for (PetscInt bid = 0, gid0 = Istart; bid < nblocks; bid++) {
153       PetscInt nn;
154       IS       new_loc_blocks;
155       PetscCall(ISGetSize(loc_blocks[bid], &nn)); // size only
156       PetscCall(ISCreateStride(PETSC_COMM_SELF, nn, gid0, 1, &new_loc_blocks));
157       PetscCall(ISDestroy(&loc_blocks[bid]));
158       loc_blocks[bid] = new_loc_blocks;
159       gid0 += nn; // start of next block
160     }
161     PetscCall(PCASMSetLocalSubdomains(pc, nblocks, loc_blocks, NULL));
162   }
163   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164                       Solve the linear system
165      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
166   PetscCall(KSPSolve(ksp, b, x));
167   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
168                       Check the solution and clean up
169      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
170   PetscCall(VecAXPY(x, -1.0, u));
171   PetscCall(VecNorm(x, NORM_2, &norm));
172   PetscCall(VecNorm(b, NORM_2, &norm0));
173   PetscCall(KSPGetIterationNumber(ksp, &its));
174   /*
175      Print convergence information.
176   */
177   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of error %g iterations %" PetscInt_FMT "\n", (double)norm / norm0, its));
178   /*
179     cleanup
180   */
181   PetscCall(KSPDestroy(&ksp));
182   if (loc_blocks) {
183     if (0) {
184       for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(ISDestroy(&loc_blocks[bid]));
185       PetscCall(PetscFree(loc_blocks));
186     } else {
187       PetscCall(PCASMDestroySubdomains(nblocks, &loc_blocks, NULL));
188     }
189   }
190   PetscCall(VecDestroy(&u));
191   PetscCall(VecDestroy(&x));
192   PetscCall(VecDestroy(&b));
193   PetscCall(MatDestroy(&A));
194   PetscCall(MatDestroy(&Pmat));
195   PetscCall(PetscFinalize());
196   return 0;
197 }
198 
199 /*TEST
200 
201   testset:
202     requires: parmetis kokkos_kernels
203     args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 4
204     nsize: 4
205     test:
206       suffix: batch
207       args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
208     test:
209       suffix: asm
210       args: -ksp_type cg -pc_type asm -sub_pc_type jacobi -sub_ksp_type tfqmr -sub_ksp_rtol 1e-3
211     test:
212       suffix: batch_bicg
213       args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type bicg -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
214 
215   test:
216     requires: kokkos_kernels
217     nsize: 4
218     suffix: no_metis_batch
219     args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-6 -m 37 -n 23 -num_local_blocks 4 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
220 
221   test:
222     requires: kokkos_kernels
223     nsize: 1
224     suffix: serial_batch
225     args: -ksp_monitor -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 16 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-6 -mat_type aijkokkos -pc_bjkokkos_ksp_converged_reason
226 
227  TEST*/
228