1 static char help[] = "Working out corner cases of the ASM preconditioner.\n\n";
2
3 #include <petscksp.h>
4
main(int argc,char ** args)5 int main(int argc, char **args)
6 {
7 KSP ksp;
8 PC pc;
9 Mat A;
10 Vec u, x, b;
11 PetscReal error;
12 PetscMPIInt rank, size, sized;
13 PetscInt M = 8, N = 8, m, n, rstart, rend, r;
14 PetscBool userSubdomains = PETSC_FALSE;
15
16 PetscFunctionBeginUser;
17 PetscCall(PetscInitialize(&argc, &args, NULL, help));
18 PetscCall(PetscOptionsGetInt(NULL, NULL, "-M", &M, NULL));
19 PetscCall(PetscOptionsGetInt(NULL, NULL, "-N", &N, NULL));
20 PetscCall(PetscOptionsGetBool(NULL, NULL, "-user_subdomains", &userSubdomains, NULL));
21 /* Do parallel decomposition */
22 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
23 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
24 sized = (PetscMPIInt)PetscSqrtReal((PetscReal)size);
25 PetscCheck(PetscSqr(sized) == size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "This test may only be run on a number of processes which is a perfect square, not %d", size);
26 PetscCheck((M % sized) == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "The number of x-vertices %" PetscInt_FMT " does not divide the number of x-processes %d", M, sized);
27 PetscCheck((N % sized) == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "The number of y-vertices %" PetscInt_FMT " does not divide the number of y-processes %d", N, sized);
28 /* Assemble the matrix for the five point stencil, YET AGAIN
29 Every other process will be empty */
30 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
31 m = (sized > 1) ? (rank % 2) ? 0 : 2 * M / sized : M;
32 n = N / sized;
33 PetscCall(MatSetSizes(A, m * n, m * n, M * N, M * N));
34 PetscCall(MatSetFromOptions(A));
35 PetscCall(MatSetUp(A));
36 PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
37 for (r = rstart; r < rend; ++r) {
38 const PetscScalar diag = 4.0, offdiag = -1.0;
39 const PetscInt i = r / N;
40 const PetscInt j = r - i * N;
41 PetscInt c;
42
43 if (i > 0) {
44 c = r - n;
45 PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
46 }
47 if (i < M - 1) {
48 c = r + n;
49 PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
50 }
51 if (j > 0) {
52 c = r - 1;
53 PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
54 }
55 if (j < N - 1) {
56 c = r + 1;
57 PetscCall(MatSetValues(A, 1, &r, 1, &c, &offdiag, INSERT_VALUES));
58 }
59 PetscCall(MatSetValues(A, 1, &r, 1, &r, &diag, INSERT_VALUES));
60 }
61 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
62 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
63 /* Setup Solve */
64 PetscCall(MatCreateVecs(A, &x, &b));
65 PetscCall(VecDuplicate(x, &u));
66 PetscCall(VecSet(u, 1.0));
67 PetscCall(MatMult(A, u, b));
68 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
69 PetscCall(KSPSetOperators(ksp, A, A));
70 PetscCall(KSPGetPC(ksp, &pc));
71 PetscCall(PCSetType(pc, PCASM));
72 /* Setup ASM by hand */
73 if (userSubdomains) {
74 IS is;
75 PetscInt *rows;
76
77 /* Use no overlap for now */
78 PetscCall(PetscMalloc1(rend - rstart, &rows));
79 for (r = rstart; r < rend; ++r) rows[r - rstart] = r;
80 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rend - rstart, rows, PETSC_OWN_POINTER, &is));
81 PetscCall(PCASMSetLocalSubdomains(pc, 1, &is, &is));
82 PetscCall(ISDestroy(&is));
83 }
84 PetscCall(KSPSetFromOptions(ksp));
85 /* Solve and Compare */
86 PetscCall(KSPSolve(ksp, b, x));
87 PetscCall(VecAXPY(x, -1.0, u));
88 PetscCall(VecNorm(x, NORM_INFINITY, &error));
89 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Infinity norm of the error: %g\n", (double)error));
90 /* Cleanup */
91 PetscCall(KSPDestroy(&ksp));
92 PetscCall(MatDestroy(&A));
93 PetscCall(VecDestroy(&u));
94 PetscCall(VecDestroy(&x));
95 PetscCall(VecDestroy(&b));
96 PetscCall(PetscFinalize());
97 return 0;
98 }
99
100 /*TEST
101
102 test:
103 suffix: 0
104 args: -ksp_view
105
106 test:
107 requires: kokkos_kernels
108 suffix: 0_kokkos
109 args: -ksp_view -mat_type aijkokkos
110
111 test:
112 requires: cuda
113 suffix: 0_cuda
114 args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
115
116 test:
117 suffix: 1
118 nsize: 4
119 args: -ksp_view
120
121 test:
122 requires: kokkos_kernels
123 suffix: 1_kokkos
124 nsize: 4
125 args: -ksp_view -mat_type aijkokkos
126
127 test:
128 requires: cuda
129 suffix: 1_cuda
130 nsize: 4
131 args: -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
132
133 test:
134 suffix: 2
135 nsize: 4
136 args: -user_subdomains -ksp_view
137
138 test:
139 requires: kokkos_kernels
140 suffix: 2_kokkos
141 nsize: 4
142 args: -user_subdomains -ksp_view -mat_type aijkokkos
143
144 test:
145 requires: cuda
146 suffix: 2_cuda
147 nsize: 4
148 args: -user_subdomains -ksp_view -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
149
150 TEST*/
151