1 static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\ 2 Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc, char **args) { 7 Mat A, RHS, C, F, X, S; 8 Vec u, x, b; 9 Vec xschur, bschur, uschur; 10 IS is_schur; 11 PetscMPIInt size; 12 PetscInt isolver = 0, size_schur, m, n, nfact, nsolve, nrhs; 13 PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; 14 PetscRandom rand; 15 PetscBool data_provided, herm, symm, use_lu, cuda = PETSC_FALSE; 16 PetscReal sratio = 5.1 / 12.; 17 PetscViewer fd; /* viewer */ 18 char solver[256]; 19 char file[PETSC_MAX_PATH_LEN]; /* input file name */ 20 21 PetscFunctionBeginUser; 22 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 23 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 24 PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test"); 25 /* Determine which type of solver we want to test for */ 26 herm = PETSC_FALSE; 27 symm = PETSC_FALSE; 28 PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL)); 29 PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL)); 30 if (herm) symm = PETSC_TRUE; 31 PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL)); 32 PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL)); 33 34 /* Determine file from which we read the matrix A */ 35 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided)); 36 if (!data_provided) { /* get matrices from PETSc distribution */ 37 PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file))); 38 if (symm) { 39 #if defined(PETSC_USE_COMPLEX) 40 PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file))); 41 #else 42 PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file))); 43 #endif 44 } else { 45 #if defined(PETSC_USE_COMPLEX) 46 PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file))); 47 #else 48 PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file))); 49 #endif 50 } 51 #if defined(PETSC_USE_64BIT_INDICES) 52 PetscCall(PetscStrlcat(file, "int64-", sizeof(file))); 53 #else 54 PetscCall(PetscStrlcat(file, "int32-", sizeof(file))); 55 #endif 56 #if defined(PETSC_USE_REAL_SINGLE) 57 PetscCall(PetscStrlcat(file, "float32", sizeof(file))); 58 #else 59 PetscCall(PetscStrlcat(file, "float64", sizeof(file))); 60 #endif 61 } 62 /* Load matrix A */ 63 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd)); 64 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 65 PetscCall(MatLoad(A, fd)); 66 PetscCall(PetscViewerDestroy(&fd)); 67 PetscCall(MatGetSize(A, &m, &n)); 68 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n); 69 70 /* Create dense matrix C and X; C holds true solution with identical columns */ 71 nrhs = 2; 72 PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL)); 73 PetscCall(MatCreate(PETSC_COMM_WORLD, &C)); 74 PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs)); 75 PetscCall(MatSetType(C, MATDENSE)); 76 PetscCall(MatSetFromOptions(C)); 77 PetscCall(MatSetUp(C)); 78 79 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand)); 80 PetscCall(PetscRandomSetFromOptions(rand)); 81 PetscCall(MatSetRandom(C, rand)); 82 PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X)); 83 84 /* Create vectors */ 85 PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); 86 PetscCall(VecSetSizes(x, n, PETSC_DECIDE)); 87 PetscCall(VecSetFromOptions(x)); 88 PetscCall(VecDuplicate(x, &b)); 89 PetscCall(VecDuplicate(x, &u)); /* save the true solution */ 90 91 PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL)); 92 switch (isolver) { 93 #if defined(PETSC_HAVE_MUMPS) 94 case 0: PetscCall(PetscStrcpy(solver, MATSOLVERMUMPS)); break; 95 #endif 96 #if defined(PETSC_HAVE_MKL_PARDISO) 97 case 1: PetscCall(PetscStrcpy(solver, MATSOLVERMKL_PARDISO)); break; 98 #endif 99 default: PetscCall(PetscStrcpy(solver, MATSOLVERPETSC)); break; 100 } 101 102 #if defined(PETSC_USE_COMPLEX) 103 if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for hermitian matrices, so make them symmetric */ 104 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.); 105 PetscScalar val = -1.0; 106 val = val + im; 107 PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES)); 108 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 109 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 110 } 111 #endif 112 113 PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL)); 114 PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio); 115 size_schur = (PetscInt)(sratio * m); 116 117 PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m)); 118 119 /* Test LU/Cholesky Factorization */ 120 use_lu = PETSC_FALSE; 121 if (!symm) use_lu = PETSC_TRUE; 122 #if defined(PETSC_USE_COMPLEX) 123 if (isolver == 1) use_lu = PETSC_TRUE; 124 #endif 125 if (cuda && symm && !herm) use_lu = PETSC_TRUE; 126 127 if (herm && !use_lu) { /* test also conversion routines inside the solver packages */ 128 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 129 PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A)); 130 } 131 132 if (use_lu) { 133 PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F)); 134 } else { 135 if (herm) { 136 PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE)); 137 } else { 138 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 139 PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE)); 140 } 141 PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F)); 142 } 143 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur)); 144 PetscCall(MatFactorSetSchurIS(F, is_schur)); 145 146 PetscCall(ISDestroy(&is_schur)); 147 if (use_lu) { 148 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 149 } else { 150 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 151 } 152 153 for (nfact = 0; nfact < 3; nfact++) { 154 Mat AD; 155 156 if (!nfact) { 157 PetscCall(VecSetRandom(x, rand)); 158 if (symm && herm) { PetscCall(VecAbs(x)); } 159 PetscCall(MatDiagonalSet(A, x, ADD_VALUES)); 160 } 161 if (use_lu) { 162 PetscCall(MatLUFactorNumeric(F, A, NULL)); 163 } else { 164 PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 165 } 166 if (cuda) { 167 PetscCall(MatFactorGetSchurComplement(F, &S, NULL)); 168 PetscCall(MatSetType(S, MATSEQDENSECUDA)); 169 PetscCall(MatCreateVecs(S, &xschur, &bschur)); 170 PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED)); 171 } 172 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 173 if (!cuda) { PetscCall(MatCreateVecs(S, &xschur, &bschur)); } 174 PetscCall(VecDuplicate(xschur, &uschur)); 175 if (nfact == 1 && (!cuda || (herm && symm))) { PetscCall(MatFactorInvertSchurComplement(F)); } 176 for (nsolve = 0; nsolve < 2; nsolve++) { 177 PetscCall(VecSetRandom(x, rand)); 178 PetscCall(VecCopy(x, u)); 179 180 if (nsolve) { 181 PetscCall(MatMult(A, x, b)); 182 PetscCall(MatSolve(F, b, x)); 183 } else { 184 PetscCall(MatMultTranspose(A, x, b)); 185 PetscCall(MatSolveTranspose(F, b, x)); 186 } 187 /* Check the error */ 188 PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */ 189 PetscCall(VecNorm(u, NORM_2, &norm)); 190 if (norm > tol) { 191 PetscReal resi; 192 if (nsolve) { 193 PetscCall(MatMult(A, x, u)); /* u = A*x */ 194 } else { 195 PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */ 196 } 197 PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */ 198 PetscCall(VecNorm(u, NORM_2, &resi)); 199 if (nsolve) { 200 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi)); 201 } else { 202 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi)); 203 } 204 } 205 PetscCall(VecSetRandom(xschur, rand)); 206 PetscCall(VecCopy(xschur, uschur)); 207 if (nsolve) { 208 PetscCall(MatMult(S, xschur, bschur)); 209 PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur)); 210 } else { 211 PetscCall(MatMultTranspose(S, xschur, bschur)); 212 PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur)); 213 } 214 /* Check the error */ 215 PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */ 216 PetscCall(VecNorm(uschur, NORM_2, &norm)); 217 if (norm > tol) { 218 PetscReal resi; 219 if (nsolve) { 220 PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */ 221 } else { 222 PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */ 223 } 224 PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */ 225 PetscCall(VecNorm(uschur, NORM_2, &resi)); 226 if (nsolve) { 227 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi)); 228 } else { 229 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi)); 230 } 231 } 232 } 233 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD)); 234 if (!nfact) { 235 PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS)); 236 } else { 237 PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS)); 238 } 239 PetscCall(MatDestroy(&AD)); 240 for (nsolve = 0; nsolve < 2; nsolve++) { 241 PetscCall(MatMatSolve(F, RHS, X)); 242 243 /* Check the error */ 244 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); 245 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); 246 if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); } 247 } 248 if (isolver == 0) { 249 Mat spRHS, spRHST, RHST; 250 251 PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST)); 252 PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST)); 253 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 254 for (nsolve = 0; nsolve < 2; nsolve++) { 255 PetscCall(MatMatSolve(F, spRHS, X)); 256 257 /* Check the error */ 258 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); 259 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); 260 if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); } 261 } 262 PetscCall(MatDestroy(&spRHST)); 263 PetscCall(MatDestroy(&spRHS)); 264 PetscCall(MatDestroy(&RHST)); 265 } 266 PetscCall(MatDestroy(&S)); 267 PetscCall(VecDestroy(&xschur)); 268 PetscCall(VecDestroy(&bschur)); 269 PetscCall(VecDestroy(&uschur)); 270 } 271 /* Free data structures */ 272 PetscCall(MatDestroy(&A)); 273 PetscCall(MatDestroy(&C)); 274 PetscCall(MatDestroy(&F)); 275 PetscCall(MatDestroy(&X)); 276 PetscCall(MatDestroy(&RHS)); 277 PetscCall(PetscRandomDestroy(&rand)); 278 PetscCall(VecDestroy(&x)); 279 PetscCall(VecDestroy(&b)); 280 PetscCall(VecDestroy(&u)); 281 PetscCall(PetscFinalize()); 282 return 0; 283 } 284 285 /*TEST 286 287 testset: 288 requires: mkl_pardiso double !complex 289 args: -solver 1 290 291 test: 292 suffix: mkl_pardiso 293 test: 294 requires: cuda 295 suffix: mkl_pardiso_cuda 296 args: -cuda_solve 297 output_file: output/ex192_mkl_pardiso.out 298 test: 299 suffix: mkl_pardiso_1 300 args: -symmetric_solve 301 output_file: output/ex192_mkl_pardiso_1.out 302 test: 303 requires: cuda 304 suffix: mkl_pardiso_cuda_1 305 args: -symmetric_solve -cuda_solve 306 output_file: output/ex192_mkl_pardiso_1.out 307 test: 308 suffix: mkl_pardiso_3 309 args: -symmetric_solve -hermitian_solve 310 output_file: output/ex192_mkl_pardiso_3.out 311 test: 312 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) 313 suffix: mkl_pardiso_cuda_3 314 args: -symmetric_solve -hermitian_solve -cuda_solve 315 output_file: output/ex192_mkl_pardiso_3.out 316 317 testset: 318 requires: mumps double !complex 319 args: -solver 0 320 321 test: 322 suffix: mumps 323 test: 324 requires: cuda 325 suffix: mumps_cuda 326 args: -cuda_solve 327 output_file: output/ex192_mumps.out 328 test: 329 suffix: mumps_2 330 args: -symmetric_solve 331 output_file: output/ex192_mumps_2.out 332 test: 333 requires: cuda 334 suffix: mumps_cuda_2 335 args: -symmetric_solve -cuda_solve 336 output_file: output/ex192_mumps_2.out 337 test: 338 suffix: mumps_3 339 args: -symmetric_solve -hermitian_solve 340 output_file: output/ex192_mumps_3.out 341 test: 342 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) 343 suffix: mumps_cuda_3 344 args: -symmetric_solve -hermitian_solve -cuda_solve 345 output_file: output/ex192_mumps_3.out 346 347 TEST*/ 348