1 static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\ 2 Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc, char **args) 7 { 8 Mat A, RHS, C, F, X, S; 9 Vec u, x, b; 10 Vec xschur, bschur, uschur; 11 IS is_schur; 12 PetscMPIInt size; 13 PetscInt isolver = 0, size_schur, m, n, nfact, nsolve, nrhs; 14 PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; 15 PetscRandom rand; 16 PetscBool data_provided, herm, symm, use_lu, cuda = PETSC_FALSE; 17 PetscBool isdata_provided; 18 PetscReal sratio = 5.1 / 12.; 19 PetscViewer fd; /* viewer */ 20 char solver[256]; 21 char file[PETSC_MAX_PATH_LEN]; /* input Mat file name */ 22 char isfile[PETSC_MAX_PATH_LEN]; /* input IS file name */ 23 24 PetscFunctionBeginUser; 25 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 26 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 27 PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test"); 28 /* Determine which type of solver we want to test for */ 29 herm = PETSC_FALSE; 30 symm = PETSC_FALSE; 31 PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL)); 32 PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL)); 33 if (herm) symm = PETSC_TRUE; 34 PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL)); 35 PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL)); 36 37 /* Determine file from which we read the matrix A */ 38 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided)); 39 if (!data_provided) { /* get matrices from PETSc distribution */ 40 PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file))); 41 if (symm) { 42 #if defined(PETSC_USE_COMPLEX) 43 PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file))); 44 #else 45 PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file))); 46 #endif 47 } else { 48 #if defined(PETSC_USE_COMPLEX) 49 PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file))); 50 #else 51 PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file))); 52 #endif 53 } 54 #if defined(PETSC_USE_64BIT_INDICES) 55 PetscCall(PetscStrlcat(file, "int64-", sizeof(file))); 56 #else 57 PetscCall(PetscStrlcat(file, "int32-", sizeof(file))); 58 #endif 59 #if defined(PETSC_USE_REAL_SINGLE) 60 PetscCall(PetscStrlcat(file, "float32", sizeof(file))); 61 #else 62 PetscCall(PetscStrlcat(file, "float64", sizeof(file))); 63 #endif 64 } 65 66 /* Load matrix A */ 67 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd)); 68 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 69 PetscCall(MatLoad(A, fd)); 70 PetscCall(MatGetSize(A, &m, &n)); 71 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n); 72 73 PetscCall(PetscOptionsGetString(NULL, NULL, "-fis", isfile, sizeof(isfile), &isdata_provided)); 74 if (isdata_provided) { 75 PetscBool samefile; 76 77 PetscCall(PetscStrcmp(isfile, file, &samefile)); 78 if (!samefile) { 79 PetscCall(PetscViewerDestroy(&fd)); 80 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, isfile, FILE_MODE_READ, &fd)); 81 } 82 PetscCall(ISCreate(PETSC_COMM_SELF, &is_schur)); 83 PetscCall(ISLoad(is_schur, fd)); 84 } else { 85 PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL)); 86 PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio); 87 size_schur = (PetscInt)(sratio * m); 88 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur)); 89 } 90 PetscCall(ISGetSize(is_schur, &size_schur)); 91 PetscCall(PetscViewerDestroy(&fd)); 92 93 /* Create dense matrix C and X; C holds true solution with identical columns */ 94 nrhs = 2; 95 PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL)); 96 PetscCall(MatCreate(PETSC_COMM_WORLD, &C)); 97 PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs)); 98 PetscCall(MatSetType(C, MATDENSE)); 99 PetscCall(MatSetFromOptions(C)); 100 PetscCall(MatSetUp(C)); 101 102 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand)); 103 PetscCall(PetscRandomSetFromOptions(rand)); 104 PetscCall(MatSetRandom(C, rand)); 105 PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X)); 106 107 /* Create vectors */ 108 PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); 109 PetscCall(VecSetSizes(x, n, PETSC_DECIDE)); 110 PetscCall(VecSetFromOptions(x)); 111 PetscCall(VecDuplicate(x, &b)); 112 PetscCall(VecDuplicate(x, &u)); /* save the true solution */ 113 114 PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL)); 115 switch (isolver) { 116 #if defined(PETSC_HAVE_MUMPS) 117 case 0: 118 PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver))); 119 break; 120 #endif 121 #if defined(PETSC_HAVE_MKL_PARDISO) 122 case 1: 123 PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver))); 124 break; 125 #endif 126 default: 127 PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver))); 128 break; 129 } 130 131 #if defined(PETSC_USE_COMPLEX) 132 if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for Hermitian matrices, so make them symmetric */ 133 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.); 134 PetscScalar val = -1.0; 135 val = val + im; 136 PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES)); 137 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 138 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 139 } 140 #endif 141 142 PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m)); 143 144 /* Test LU/Cholesky Factorization */ 145 use_lu = PETSC_FALSE; 146 if (!symm) use_lu = PETSC_TRUE; 147 #if defined(PETSC_USE_COMPLEX) 148 if (isolver == 1) use_lu = PETSC_TRUE; 149 #endif 150 if (cuda && symm && !herm) use_lu = PETSC_TRUE; 151 152 if (herm && !use_lu) { /* test also conversion routines inside the solver packages */ 153 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 154 PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A)); 155 } 156 157 if (use_lu) { 158 PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F)); 159 } else { 160 if (herm) { 161 PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE)); 162 } else { 163 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 164 PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE)); 165 } 166 PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F)); 167 } 168 169 /* Set Schur complement indices */ 170 PetscCall(MatFactorSetSchurIS(F, is_schur)); 171 PetscCall(ISDestroy(&is_schur)); 172 173 if (use_lu) { 174 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 175 } else { 176 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 177 } 178 179 for (nfact = 0; nfact < 3; nfact++) { 180 Mat AD; 181 182 if (nfact == 1) { 183 PetscCall(VecSetRandom(x, rand)); 184 if (symm && herm) PetscCall(VecAbs(x)); 185 PetscCall(MatDiagonalSet(A, x, ADD_VALUES)); 186 } 187 if (use_lu) { 188 PetscCall(MatLUFactorNumeric(F, A, NULL)); 189 } else { 190 PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 191 } 192 193 if (cuda) { 194 PetscCall(MatFactorGetSchurComplement(F, &S, NULL)); 195 PetscCall(MatSetType(S, MATSEQDENSECUDA)); 196 PetscCall(MatCreateVecs(S, &xschur, &bschur)); 197 PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED)); 198 } 199 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 200 if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur)); 201 PetscCall(VecDuplicate(xschur, &uschur)); 202 if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F)); 203 for (nsolve = 0; nsolve < 2; nsolve++) { 204 PetscCall(VecSetRandom(x, rand)); 205 PetscCall(VecCopy(x, u)); 206 207 if (nsolve) { 208 PetscCall(MatMult(A, x, b)); 209 PetscCall(MatSolve(F, b, x)); 210 } else { 211 PetscCall(MatMultTranspose(A, x, b)); 212 PetscCall(MatSolveTranspose(F, b, x)); 213 } 214 /* Check the error */ 215 PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */ 216 PetscCall(VecNorm(u, NORM_2, &norm)); 217 if (norm > tol) { 218 PetscReal resi; 219 if (nsolve) { 220 PetscCall(MatMult(A, x, u)); /* u = A*x */ 221 } else { 222 PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */ 223 } 224 PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */ 225 PetscCall(VecNorm(u, NORM_2, &resi)); 226 if (nsolve) { 227 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi)); 228 } else { 229 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi)); 230 } 231 } 232 PetscCall(VecSetRandom(xschur, rand)); 233 PetscCall(VecCopy(xschur, uschur)); 234 if (nsolve) { 235 PetscCall(MatMult(S, xschur, bschur)); 236 PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur)); 237 } else { 238 PetscCall(MatMultTranspose(S, xschur, bschur)); 239 PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur)); 240 } 241 /* Check the error */ 242 PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */ 243 PetscCall(VecNorm(uschur, NORM_2, &norm)); 244 if (norm > tol) { 245 PetscReal resi; 246 if (nsolve) { 247 PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */ 248 } else { 249 PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */ 250 } 251 PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */ 252 PetscCall(VecNorm(uschur, NORM_2, &resi)); 253 if (nsolve) { 254 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi)); 255 } else { 256 PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi)); 257 } 258 } 259 } 260 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD)); 261 if (!nfact) { 262 PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS)); 263 } else { 264 PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS)); 265 } 266 PetscCall(MatDestroy(&AD)); 267 for (nsolve = 0; nsolve < 2; nsolve++) { 268 PetscCall(MatMatSolve(F, RHS, X)); 269 270 /* Check the error */ 271 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); 272 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); 273 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); 274 #if PetscDefined(HAVE_MUMPS) 275 PetscCall(MatMumpsSetIcntl(F, 26, 1)); 276 PetscCall(MatMatSolve(F, RHS, X)); 277 PetscCall(MatMumpsSetIcntl(F, 26, 2)); 278 PetscCall(MatMatSolve(F, RHS, X)); 279 PetscCall(MatMumpsSetIcntl(F, 26, -1)); 280 281 /* Check the error */ 282 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); 283 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); 284 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); 285 #endif 286 } 287 if (isolver == 0) { 288 Mat spRHS, spRHST, RHST; 289 290 PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST)); 291 PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST)); 292 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 293 for (nsolve = 0; nsolve < 2; nsolve++) { 294 PetscCall(MatMatSolve(F, spRHS, X)); 295 296 /* Check the error */ 297 PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); 298 PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); 299 if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); 300 } 301 PetscCall(MatDestroy(&spRHST)); 302 PetscCall(MatDestroy(&spRHS)); 303 PetscCall(MatDestroy(&RHST)); 304 } 305 PetscCall(MatDestroy(&S)); 306 PetscCall(VecDestroy(&xschur)); 307 PetscCall(VecDestroy(&bschur)); 308 PetscCall(VecDestroy(&uschur)); 309 } 310 /* Free data structures */ 311 PetscCall(MatDestroy(&A)); 312 PetscCall(MatDestroy(&C)); 313 PetscCall(MatDestroy(&F)); 314 PetscCall(MatDestroy(&X)); 315 PetscCall(MatDestroy(&RHS)); 316 PetscCall(PetscRandomDestroy(&rand)); 317 PetscCall(VecDestroy(&x)); 318 PetscCall(VecDestroy(&b)); 319 PetscCall(VecDestroy(&u)); 320 PetscCall(PetscFinalize()); 321 return 0; 322 } 323 324 /*TEST 325 326 testset: 327 requires: mkl_pardiso double !complex 328 args: -solver 1 329 330 test: 331 suffix: mkl_pardiso 332 test: 333 requires: cuda 334 suffix: mkl_pardiso_cuda 335 args: -cuda_solve 336 output_file: output/ex192_mkl_pardiso.out 337 test: 338 suffix: mkl_pardiso_1 339 args: -symmetric_solve 340 output_file: output/ex192_mkl_pardiso_1.out 341 test: 342 requires: cuda 343 suffix: mkl_pardiso_cuda_1 344 args: -symmetric_solve -cuda_solve 345 output_file: output/ex192_mkl_pardiso_1.out 346 test: 347 suffix: mkl_pardiso_3 348 args: -symmetric_solve -hermitian_solve 349 output_file: output/ex192_mkl_pardiso_3.out 350 test: 351 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) 352 suffix: mkl_pardiso_cuda_3 353 args: -symmetric_solve -hermitian_solve -cuda_solve 354 output_file: output/ex192_mkl_pardiso_3.out 355 356 testset: 357 requires: mumps double !complex 358 args: -solver 0 359 360 test: 361 suffix: mumps 362 test: 363 requires: cuda 364 suffix: mumps_cuda 365 args: -cuda_solve 366 output_file: output/ex192_mumps.out 367 test: 368 suffix: mumps_2 369 args: -symmetric_solve 370 output_file: output/ex192_mumps_2.out 371 test: 372 requires: cuda 373 suffix: mumps_cuda_2 374 args: -symmetric_solve -cuda_solve 375 output_file: output/ex192_mumps_2.out 376 test: 377 suffix: mumps_3 378 args: -symmetric_solve -hermitian_solve 379 output_file: output/ex192_mumps_3.out 380 test: 381 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) 382 suffix: mumps_cuda_3 383 args: -symmetric_solve -hermitian_solve -cuda_solve 384 output_file: output/ex192_mumps_3.out 385 386 testset: 387 requires: mumps double !complex defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 388 args: -solver 0 -pc_precision single -tol 3.4e-4 389 390 test: 391 suffix: mumps_s 392 output_file: output/ex192_mumps.out 393 394 test: 395 requires: cuda 396 suffix: mumps_cuda_s 397 args: -cuda_solve 398 output_file: output/ex192_mumps.out 399 test: 400 suffix: mumps_2_s 401 args: -symmetric_solve 402 output_file: output/ex192_mumps_2.out 403 test: 404 requires: cuda 405 suffix: mumps_cuda_2_s 406 args: -symmetric_solve -cuda_solve 407 output_file: output/ex192_mumps_2.out 408 test: 409 suffix: mumps_3_s 410 args: -symmetric_solve -hermitian_solve 411 output_file: output/ex192_mumps_3.out 412 test: 413 requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) 414 suffix: mumps_cuda_3_s 415 args: -symmetric_solve -hermitian_solve -cuda_solve 416 output_file: output/ex192_mumps_3.out 417 418 TEST*/ 419