1 static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
2 Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";
3
4 #include <petscmat.h>
5
main(int argc,char ** args)6 int main(int argc, char **args)
7 {
8 Mat A, RHS, C, F, X, S;
9 Vec u, x, b;
10 Vec xschur, bschur, uschur;
11 IS is_schur;
12 PetscMPIInt size;
13 PetscInt isolver = 0, size_schur, m, n, nfact, nsolve, nrhs;
14 PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON;
15 PetscRandom rand;
16 PetscBool data_provided, herm, symm, use_lu, cuda = PETSC_FALSE;
17 PetscBool isdata_provided;
18 PetscReal sratio = 5.1 / 12.;
19 PetscViewer fd; /* viewer */
20 char solver[256];
21 char file[PETSC_MAX_PATH_LEN]; /* input Mat file name */
22 char isfile[PETSC_MAX_PATH_LEN]; /* input IS file name */
23
24 PetscFunctionBeginUser;
25 PetscCall(PetscInitialize(&argc, &args, NULL, help));
26 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
27 PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test");
28 /* Determine which type of solver we want to test for */
29 herm = PETSC_FALSE;
30 symm = PETSC_FALSE;
31 PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL));
32 PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL));
33 if (herm) symm = PETSC_TRUE;
34 PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL));
35 PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL));
36
37 /* Determine file from which we read the matrix A */
38 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided));
39 if (!data_provided) { /* get matrices from PETSc distribution */
40 PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file)));
41 if (symm) {
42 #if defined(PETSC_USE_COMPLEX)
43 PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file)));
44 #else
45 PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file)));
46 #endif
47 } else {
48 #if defined(PETSC_USE_COMPLEX)
49 PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file)));
50 #else
51 PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file)));
52 #endif
53 }
54 #if defined(PETSC_USE_64BIT_INDICES)
55 PetscCall(PetscStrlcat(file, "int64-", sizeof(file)));
56 #else
57 PetscCall(PetscStrlcat(file, "int32-", sizeof(file)));
58 #endif
59 #if defined(PETSC_USE_REAL_SINGLE)
60 PetscCall(PetscStrlcat(file, "float32", sizeof(file)));
61 #else
62 PetscCall(PetscStrlcat(file, "float64", sizeof(file)));
63 #endif
64 }
65
66 /* Load matrix A */
67 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd));
68 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
69 PetscCall(MatLoad(A, fd));
70 PetscCall(MatGetSize(A, &m, &n));
71 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n);
72
73 PetscCall(PetscOptionsGetString(NULL, NULL, "-fis", isfile, sizeof(isfile), &isdata_provided));
74 if (isdata_provided) {
75 PetscBool samefile;
76
77 PetscCall(PetscStrcmp(isfile, file, &samefile));
78 if (!samefile) {
79 PetscCall(PetscViewerDestroy(&fd));
80 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, isfile, FILE_MODE_READ, &fd));
81 }
82 PetscCall(ISCreate(PETSC_COMM_SELF, &is_schur));
83 PetscCall(ISLoad(is_schur, fd));
84 } else {
85 PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL));
86 PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio);
87 size_schur = (PetscInt)(sratio * m);
88 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur));
89 }
90 PetscCall(ISGetSize(is_schur, &size_schur));
91 PetscCall(PetscViewerDestroy(&fd));
92
93 /* Create dense matrix C and X; C holds true solution with identical columns */
94 nrhs = 2;
95 PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
96 PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
97 PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs));
98 PetscCall(MatSetType(C, MATDENSE));
99 PetscCall(MatSetFromOptions(C));
100 PetscCall(MatSetUp(C));
101
102 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
103 PetscCall(PetscRandomSetFromOptions(rand));
104 PetscCall(MatSetRandom(C, rand));
105 PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X));
106
107 /* Create vectors */
108 PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
109 PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
110 PetscCall(VecSetFromOptions(x));
111 PetscCall(VecDuplicate(x, &b));
112 PetscCall(VecDuplicate(x, &u)); /* save the true solution */
113
114 PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL));
115 switch (isolver) {
116 #if defined(PETSC_HAVE_MUMPS)
117 case 0:
118 PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver)));
119 break;
120 #endif
121 #if defined(PETSC_HAVE_MKL_PARDISO)
122 case 1:
123 PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver)));
124 break;
125 #endif
126 default:
127 PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
128 break;
129 }
130
131 #if defined(PETSC_USE_COMPLEX)
132 if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for Hermitian matrices, so make them symmetric */
133 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
134 PetscScalar val = -1.0;
135 val = val + im;
136 PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES));
137 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
138 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
139 }
140 #endif
141
142 PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m));
143
144 /* Test LU/Cholesky Factorization */
145 use_lu = PETSC_FALSE;
146 if (!symm) use_lu = PETSC_TRUE;
147 if (PetscDefined(USE_COMPLEX) && isolver == 1) use_lu = PETSC_TRUE;
148 if (cuda && symm && !herm) use_lu = PETSC_TRUE;
149
150 if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
151 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
152 PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A));
153 }
154
155 if (use_lu) {
156 PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F));
157 } else {
158 if (herm) {
159 PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
160 } else {
161 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
162 PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE));
163 }
164 PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F));
165 }
166
167 /* Set Schur complement indices */
168 PetscCall(MatFactorSetSchurIS(F, is_schur));
169 PetscCall(ISDestroy(&is_schur));
170
171 if (use_lu) {
172 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
173 } else {
174 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
175 }
176
177 for (nfact = 0; nfact < 3; nfact++) {
178 Mat AD;
179
180 if (nfact == 1) {
181 PetscCall(VecSetRandom(x, rand));
182 if (symm && herm) PetscCall(VecAbs(x));
183 PetscCall(MatDiagonalSet(A, x, ADD_VALUES));
184 }
185 if (use_lu) {
186 PetscCall(MatLUFactorNumeric(F, A, NULL));
187 } else {
188 PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
189 }
190
191 if (cuda) {
192 PetscCall(MatFactorGetSchurComplement(F, &S, NULL));
193 PetscCall(MatSetType(S, MATSEQDENSECUDA));
194 PetscCall(MatCreateVecs(S, &xschur, &bschur));
195 PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED));
196 }
197 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
198 if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur));
199 PetscCall(VecDuplicate(xschur, &uschur));
200 if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F));
201 for (nsolve = 0; nsolve < 2; nsolve++) {
202 PetscCall(VecSetRandom(x, rand));
203 PetscCall(VecCopy(x, u));
204
205 if (nsolve) {
206 PetscCall(MatMult(A, x, b));
207 PetscCall(MatSolve(F, b, x));
208 } else {
209 PetscCall(MatMultTranspose(A, x, b));
210 PetscCall(MatSolveTranspose(F, b, x));
211 }
212 /* Check the error */
213 PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */
214 PetscCall(VecNorm(u, NORM_2, &norm));
215 if (norm > tol) {
216 PetscReal resi;
217 if (nsolve) {
218 PetscCall(MatMult(A, x, u)); /* u = A*x */
219 } else {
220 PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */
221 }
222 PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */
223 PetscCall(VecNorm(u, NORM_2, &resi));
224 if (nsolve) {
225 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
226 } else {
227 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
228 }
229 }
230 PetscCall(VecSetRandom(xschur, rand));
231 PetscCall(VecCopy(xschur, uschur));
232 if (nsolve) {
233 PetscCall(MatMult(S, xschur, bschur));
234 PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur));
235 } else {
236 PetscCall(MatMultTranspose(S, xschur, bschur));
237 PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur));
238 }
239 /* Check the error */
240 PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */
241 PetscCall(VecNorm(uschur, NORM_2, &norm));
242 if (norm > tol) {
243 PetscReal resi;
244 if (nsolve) {
245 PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */
246 } else {
247 PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */
248 }
249 PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */
250 PetscCall(VecNorm(uschur, NORM_2, &resi));
251 if (nsolve) {
252 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
253 } else {
254 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
255 }
256 }
257 }
258 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD));
259 if (!nfact) {
260 PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS));
261 } else {
262 PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS));
263 }
264 PetscCall(MatDestroy(&AD));
265 for (nsolve = 0; nsolve < 2; nsolve++) {
266 PetscCall(MatMatSolve(F, RHS, X));
267
268 /* Check the error */
269 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
270 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
271 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
272 #if PetscDefined(HAVE_MUMPS)
273 PetscCall(MatMumpsSetIcntl(F, 26, 1));
274 PetscCall(MatMatSolve(F, RHS, X));
275 PetscCall(MatMumpsSetIcntl(F, 26, 2));
276 PetscCall(MatMatSolve(F, RHS, X));
277 PetscCall(MatMumpsSetIcntl(F, 26, -1));
278
279 /* Check the error */
280 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
281 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
282 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
283 #endif
284 }
285 if (isolver == 0) {
286 Mat spRHS, spRHST, RHST;
287
288 PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST));
289 PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST));
290 PetscCall(MatCreateTranspose(spRHST, &spRHS));
291 for (nsolve = 0; nsolve < 2; nsolve++) {
292 PetscCall(MatMatSolve(F, spRHS, X));
293
294 /* Check the error */
295 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
296 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
297 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
298 }
299 PetscCall(MatDestroy(&spRHST));
300 PetscCall(MatDestroy(&spRHS));
301 PetscCall(MatDestroy(&RHST));
302 }
303 PetscCall(MatDestroy(&S));
304 PetscCall(VecDestroy(&xschur));
305 PetscCall(VecDestroy(&bschur));
306 PetscCall(VecDestroy(&uschur));
307 }
308 /* Free data structures */
309 PetscCall(MatDestroy(&A));
310 PetscCall(MatDestroy(&C));
311 PetscCall(MatDestroy(&F));
312 PetscCall(MatDestroy(&X));
313 PetscCall(MatDestroy(&RHS));
314 PetscCall(PetscRandomDestroy(&rand));
315 PetscCall(VecDestroy(&x));
316 PetscCall(VecDestroy(&b));
317 PetscCall(VecDestroy(&u));
318 PetscCall(PetscFinalize());
319 return 0;
320 }
321
322 /*TEST
323
324 testset:
325 requires: mkl_pardiso double !complex
326 args: -solver 1
327
328 test:
329 suffix: mkl_pardiso
330 test:
331 requires: cuda
332 suffix: mkl_pardiso_cuda
333 args: -cuda_solve
334 output_file: output/ex192_mkl_pardiso.out
335 test:
336 suffix: mkl_pardiso_1
337 args: -symmetric_solve
338 output_file: output/ex192_mkl_pardiso_1.out
339 test:
340 requires: cuda
341 suffix: mkl_pardiso_cuda_1
342 args: -symmetric_solve -cuda_solve
343 output_file: output/ex192_mkl_pardiso_1.out
344 test:
345 suffix: mkl_pardiso_3
346 args: -symmetric_solve -hermitian_solve
347 output_file: output/ex192_mkl_pardiso_3.out
348 test:
349 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
350 suffix: mkl_pardiso_cuda_3
351 args: -symmetric_solve -hermitian_solve -cuda_solve
352 output_file: output/ex192_mkl_pardiso_3.out
353
354 testset:
355 requires: mumps double !complex
356 args: -solver 0
357
358 test:
359 suffix: mumps
360 test:
361 requires: cuda
362 suffix: mumps_cuda
363 args: -cuda_solve
364 output_file: output/ex192_mumps.out
365 test:
366 suffix: mumps_2
367 args: -symmetric_solve
368 output_file: output/ex192_mumps_2.out
369 test:
370 requires: cuda
371 suffix: mumps_cuda_2
372 args: -symmetric_solve -cuda_solve
373 output_file: output/ex192_mumps_2.out
374 test:
375 suffix: mumps_3
376 args: -symmetric_solve -hermitian_solve
377 output_file: output/ex192_mumps_3.out
378 test:
379 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
380 suffix: mumps_cuda_3
381 args: -symmetric_solve -hermitian_solve -cuda_solve
382 output_file: output/ex192_mumps_3.out
383
384 testset:
385 requires: mumps double !complex defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
386 args: -solver 0 -pc_precision single -tol 3.4e-4
387
388 test:
389 suffix: mumps_s
390 output_file: output/ex192_mumps.out
391
392 test:
393 requires: cuda
394 suffix: mumps_cuda_s
395 args: -cuda_solve
396 output_file: output/ex192_mumps.out
397 test:
398 suffix: mumps_2_s
399 args: -symmetric_solve
400 output_file: output/ex192_mumps_2.out
401 test:
402 requires: cuda
403 suffix: mumps_cuda_2_s
404 args: -symmetric_solve -cuda_solve
405 output_file: output/ex192_mumps_2.out
406 test:
407 suffix: mumps_3_s
408 args: -symmetric_solve -hermitian_solve
409 output_file: output/ex192_mumps_3.out
410 test:
411 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
412 suffix: mumps_cuda_3_s
413 args: -symmetric_solve -hermitian_solve -cuda_solve
414 output_file: output/ex192_mumps_3.out
415
416 TEST*/
417