static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\ Example: mpiexec -n 1 ./ex192 -f -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n"; #include int main(int argc, char **args) { Mat A, RHS, C, F, X, S; Vec u, x, b; Vec xschur, bschur, uschur; IS is_schur; PetscMPIInt size; PetscInt isolver = 0, size_schur, m, n, nfact, nsolve, nrhs; PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON; PetscRandom rand; PetscBool data_provided, herm, symm, use_lu, cuda = PETSC_FALSE; PetscBool isdata_provided; PetscReal sratio = 5.1 / 12.; PetscViewer fd; /* viewer */ char solver[256]; char file[PETSC_MAX_PATH_LEN]; /* input Mat file name */ char isfile[PETSC_MAX_PATH_LEN]; /* input IS file name */ PetscFunctionBeginUser; PetscCall(PetscInitialize(&argc, &args, NULL, help)); PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test"); /* Determine which type of solver we want to test for */ herm = PETSC_FALSE; symm = PETSC_FALSE; PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL)); PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL)); if (herm) symm = PETSC_TRUE; PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL)); PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL)); /* Determine file from which we read the matrix A */ PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided)); if (!data_provided) { /* get matrices from PETSc distribution */ PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file))); if (symm) { #if defined(PETSC_USE_COMPLEX) PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file))); #else PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file))); #endif } else { #if defined(PETSC_USE_COMPLEX) PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file))); #else PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file))); #endif } #if defined(PETSC_USE_64BIT_INDICES) PetscCall(PetscStrlcat(file, "int64-", sizeof(file))); #else PetscCall(PetscStrlcat(file, "int32-", sizeof(file))); #endif #if defined(PETSC_USE_REAL_SINGLE) PetscCall(PetscStrlcat(file, "float32", sizeof(file))); #else PetscCall(PetscStrlcat(file, "float64", sizeof(file))); #endif } /* Load matrix A */ PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd)); PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); PetscCall(MatLoad(A, fd)); PetscCall(MatGetSize(A, &m, &n)); PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n); PetscCall(PetscOptionsGetString(NULL, NULL, "-fis", isfile, sizeof(isfile), &isdata_provided)); if (isdata_provided) { PetscBool samefile; PetscCall(PetscStrcmp(isfile, file, &samefile)); if (!samefile) { PetscCall(PetscViewerDestroy(&fd)); PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, isfile, FILE_MODE_READ, &fd)); } PetscCall(ISCreate(PETSC_COMM_SELF, &is_schur)); PetscCall(ISLoad(is_schur, fd)); } else { PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL)); PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio); size_schur = (PetscInt)(sratio * m); PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur)); } PetscCall(ISGetSize(is_schur, &size_schur)); PetscCall(PetscViewerDestroy(&fd)); /* Create dense matrix C and X; C holds true solution with identical columns */ nrhs = 2; PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL)); PetscCall(MatCreate(PETSC_COMM_WORLD, &C)); PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs)); PetscCall(MatSetType(C, MATDENSE)); PetscCall(MatSetFromOptions(C)); PetscCall(MatSetUp(C)); PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand)); PetscCall(PetscRandomSetFromOptions(rand)); PetscCall(MatSetRandom(C, rand)); PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X)); /* Create vectors */ PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); PetscCall(VecSetSizes(x, n, PETSC_DECIDE)); PetscCall(VecSetFromOptions(x)); PetscCall(VecDuplicate(x, &b)); PetscCall(VecDuplicate(x, &u)); /* save the true solution */ PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL)); switch (isolver) { #if defined(PETSC_HAVE_MUMPS) case 0: PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver))); break; #endif #if defined(PETSC_HAVE_MKL_PARDISO) case 1: PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver))); break; #endif default: PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver))); break; } #if defined(PETSC_USE_COMPLEX) if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for Hermitian matrices, so make them symmetric */ PetscScalar im = PetscSqrtScalar((PetscScalar)-1.); PetscScalar val = -1.0; val = val + im; PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES)); PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); } #endif PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m)); /* Test LU/Cholesky Factorization */ use_lu = PETSC_FALSE; if (!symm) use_lu = PETSC_TRUE; if (PetscDefined(USE_COMPLEX) && isolver == 1) use_lu = PETSC_TRUE; if (cuda && symm && !herm) use_lu = PETSC_TRUE; if (herm && !use_lu) { /* test also conversion routines inside the solver packages */ PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A)); } if (use_lu) { PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F)); } else { if (herm) { PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE)); } else { PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE)); } PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F)); } /* Set Schur complement indices */ PetscCall(MatFactorSetSchurIS(F, is_schur)); PetscCall(ISDestroy(&is_schur)); if (use_lu) { PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); } else { PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); } for (nfact = 0; nfact < 3; nfact++) { Mat AD; if (nfact == 1) { PetscCall(VecSetRandom(x, rand)); if (symm && herm) PetscCall(VecAbs(x)); PetscCall(MatDiagonalSet(A, x, ADD_VALUES)); } if (use_lu) { PetscCall(MatLUFactorNumeric(F, A, NULL)); } else { PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); } if (cuda) { PetscCall(MatFactorGetSchurComplement(F, &S, NULL)); PetscCall(MatSetType(S, MATSEQDENSECUDA)); PetscCall(MatCreateVecs(S, &xschur, &bschur)); PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED)); } PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur)); PetscCall(VecDuplicate(xschur, &uschur)); if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F)); for (nsolve = 0; nsolve < 2; nsolve++) { PetscCall(VecSetRandom(x, rand)); PetscCall(VecCopy(x, u)); if (nsolve) { PetscCall(MatMult(A, x, b)); PetscCall(MatSolve(F, b, x)); } else { PetscCall(MatMultTranspose(A, x, b)); PetscCall(MatSolveTranspose(F, b, x)); } /* Check the error */ PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */ PetscCall(VecNorm(u, NORM_2, &norm)); if (norm > tol) { PetscReal resi; if (nsolve) { PetscCall(MatMult(A, x, u)); /* u = A*x */ } else { PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */ } PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */ PetscCall(VecNorm(u, NORM_2, &resi)); if (nsolve) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi)); } else { PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi)); } } PetscCall(VecSetRandom(xschur, rand)); PetscCall(VecCopy(xschur, uschur)); if (nsolve) { PetscCall(MatMult(S, xschur, bschur)); PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur)); } else { PetscCall(MatMultTranspose(S, xschur, bschur)); PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur)); } /* Check the error */ PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */ PetscCall(VecNorm(uschur, NORM_2, &norm)); if (norm > tol) { PetscReal resi; if (nsolve) { PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */ } else { PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */ } PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */ PetscCall(VecNorm(uschur, NORM_2, &resi)); if (nsolve) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi)); } else { PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi)); } } } PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD)); if (!nfact) { PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS)); } else { PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS)); } PetscCall(MatDestroy(&AD)); for (nsolve = 0; nsolve < 2; nsolve++) { PetscCall(MatMatSolve(F, RHS, X)); /* Check the error */ PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); #if PetscDefined(HAVE_MUMPS) PetscCall(MatMumpsSetIcntl(F, 26, 1)); PetscCall(MatMatSolve(F, RHS, X)); PetscCall(MatMumpsSetIcntl(F, 26, 2)); PetscCall(MatMatSolve(F, RHS, X)); PetscCall(MatMumpsSetIcntl(F, 26, -1)); /* Check the error */ PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); #endif } if (isolver == 0) { Mat spRHS, spRHST, RHST; PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST)); PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST)); PetscCall(MatCreateTranspose(spRHST, &spRHS)); for (nsolve = 0; nsolve < 2; nsolve++) { PetscCall(MatMatSolve(F, spRHS, X)); /* Check the error */ PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN)); PetscCall(MatNorm(X, NORM_FROBENIUS, &norm)); if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm)); } PetscCall(MatDestroy(&spRHST)); PetscCall(MatDestroy(&spRHS)); PetscCall(MatDestroy(&RHST)); } PetscCall(MatDestroy(&S)); PetscCall(VecDestroy(&xschur)); PetscCall(VecDestroy(&bschur)); PetscCall(VecDestroy(&uschur)); } /* Free data structures */ PetscCall(MatDestroy(&A)); PetscCall(MatDestroy(&C)); PetscCall(MatDestroy(&F)); PetscCall(MatDestroy(&X)); PetscCall(MatDestroy(&RHS)); PetscCall(PetscRandomDestroy(&rand)); PetscCall(VecDestroy(&x)); PetscCall(VecDestroy(&b)); PetscCall(VecDestroy(&u)); PetscCall(PetscFinalize()); return 0; } /*TEST testset: requires: mkl_pardiso double !complex args: -solver 1 test: suffix: mkl_pardiso test: requires: cuda suffix: mkl_pardiso_cuda args: -cuda_solve output_file: output/ex192_mkl_pardiso.out test: suffix: mkl_pardiso_1 args: -symmetric_solve output_file: output/ex192_mkl_pardiso_1.out test: requires: cuda suffix: mkl_pardiso_cuda_1 args: -symmetric_solve -cuda_solve output_file: output/ex192_mkl_pardiso_1.out test: suffix: mkl_pardiso_3 args: -symmetric_solve -hermitian_solve output_file: output/ex192_mkl_pardiso_3.out test: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) suffix: mkl_pardiso_cuda_3 args: -symmetric_solve -hermitian_solve -cuda_solve output_file: output/ex192_mkl_pardiso_3.out testset: requires: mumps double !complex args: -solver 0 test: suffix: mumps test: requires: cuda suffix: mumps_cuda args: -cuda_solve output_file: output/ex192_mumps.out test: suffix: mumps_2 args: -symmetric_solve output_file: output/ex192_mumps_2.out test: requires: cuda suffix: mumps_cuda_2 args: -symmetric_solve -cuda_solve output_file: output/ex192_mumps_2.out test: suffix: mumps_3 args: -symmetric_solve -hermitian_solve output_file: output/ex192_mumps_3.out test: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) suffix: mumps_cuda_3 args: -symmetric_solve -hermitian_solve -cuda_solve output_file: output/ex192_mumps_3.out testset: requires: mumps double !complex defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) args: -solver 0 -pc_precision single -tol 3.4e-4 test: suffix: mumps_s output_file: output/ex192_mumps.out test: requires: cuda suffix: mumps_cuda_s args: -cuda_solve output_file: output/ex192_mumps.out test: suffix: mumps_2_s args: -symmetric_solve output_file: output/ex192_mumps_2.out test: requires: cuda suffix: mumps_cuda_2_s args: -symmetric_solve -cuda_solve output_file: output/ex192_mumps_2.out test: suffix: mumps_3_s args: -symmetric_solve -hermitian_solve output_file: output/ex192_mumps_3.out test: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI) suffix: mumps_cuda_3_s args: -symmetric_solve -hermitian_solve -cuda_solve output_file: output/ex192_mumps_3.out TEST*/