static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\ For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; #include static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b) { PetscRandom rand; Mat mat,RHS,SOLU; PetscInt rstart, rend; PetscInt cstart, cend; PetscScalar value = 1.0; Vec x, y, b; PetscFunctionBegin; /* create multiple vectors RHS and SOLU */ PetscCall(MatCreate(PETSC_COMM_WORLD,&RHS)); PetscCall(MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs)); PetscCall(MatSetType(RHS,MATDENSE)); PetscCall(MatSetOptionsPrefix(RHS,"rhs_")); PetscCall(MatSetFromOptions(RHS)); PetscCall(MatSeqDenseSetPreallocation(RHS,NULL)); PetscCall(PetscRandomCreate(PETSC_COMM_WORLD,&rand)); PetscCall(PetscRandomSetFromOptions(rand)); PetscCall(MatSetRandom(RHS,rand)); if (m == n) { PetscCall(MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU)); } else { PetscCall(MatCreate(PETSC_COMM_WORLD,&SOLU)); PetscCall(MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs)); PetscCall(MatSetType(SOLU,MATDENSE)); PetscCall(MatSeqDenseSetPreallocation(SOLU,NULL)); } PetscCall(MatSetRandom(SOLU,rand)); /* create matrix */ PetscCall(MatCreate(PETSC_COMM_WORLD,&mat)); PetscCall(MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n)); PetscCall(MatSetType(mat,MATDENSE)); PetscCall(MatSetFromOptions(mat)); PetscCall(MatSetUp(mat)); PetscCall(MatGetOwnershipRange(mat,&rstart,&rend)); PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); if (!full) { for (PetscInt i=rstart; i tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm)); } PetscCall(MatMatSolve(F,RHS,SOLU)); PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm)); } PetscCall(MatDestroy(&F)); PetscCall(MatDestroy(&RHS2)); } /* out-of-place Cholesky */ PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F)); if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); PetscCall(MatCholeskyFactorSymbolic(F,mat,perm,0)); PetscCall(MatCholeskyFactorNumeric(F,mat,0)); PetscCall(MatSolve(F,b,y)); PetscCall(VecAXPY(y,-1.0,x)); PetscCall(VecNorm(y,NORM_2,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm)); } PetscCall(MatDestroy(&F)); /* LU factorization - perms and factinfo are ignored by LAPACK */ i = n-1; PetscCall(MatZeroRows(mat,1,&i,-1.0,NULL,NULL)); PetscCall(MatMult(mat,x,b)); /* in-place LU */ if (inplace) { Mat RHS2; PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); PetscCall(MatLUFactor(F,perm,perm,0)); PetscCall(MatSolve(F,b,y)); PetscCall(VecAXPY(y,-1.0,x)); PetscCall(VecNorm(y,NORM_2,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm)); } PetscCall(MatMatSolve(F,RHS,SOLU)); PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm)); } PetscCall(MatDestroy(&F)); PetscCall(MatDestroy(&RHS2)); } /* out-of-place LU */ PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_LU,&F)); PetscCall(MatLUFactorSymbolic(F,mat,perm,perm,0)); PetscCall(MatLUFactorNumeric(F,mat,0)); PetscCall(MatSolve(F,b,y)); PetscCall(VecAXPY(y,-1.0,x)); PetscCall(VecNorm(y,NORM_2,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm)); } /* free space */ PetscCall(ISDestroy(&perm)); PetscCall(MatDestroy(&F)); PetscCall(MatDestroy(&mat)); PetscCall(MatDestroy(&RHS)); PetscCall(MatDestroy(&SOLU)); PetscCall(VecDestroy(&x)); PetscCall(VecDestroy(&b)); PetscCall(VecDestroy(&y)); PetscCall(VecDestroy(&ytmp)); if (qr) { /* setup rectanglar */ PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); PetscCall(VecDuplicate(y,&ytmp)); /* QR factorization - perms and factinfo are ignored by LAPACK */ PetscCall(MatMult(mat,x,b)); /* in-place QR */ if (inplace) { Mat SOLU2; PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); PetscCall(MatQRFactor(F,NULL,0)); PetscCall(MatSolve(F,b,y)); PetscCall(VecAXPY(y,-1.0,x)); PetscCall(VecNorm(y,NORM_2,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm)); } PetscCall(MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS)); PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2)); PetscCall(MatMatSolve(F,RHS,SOLU2)); PetscCall(MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN)); PetscCall(MatNorm(SOLU2,NORM_FROBENIUS,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm)); } PetscCall(MatDestroy(&F)); PetscCall(MatDestroy(&SOLU2)); } /* out-of-place QR */ PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_QR,&F)); PetscCall(MatQRFactorSymbolic(F,mat,NULL,NULL)); PetscCall(MatQRFactorNumeric(F,mat,NULL)); PetscCall(MatSolve(F,b,y)); PetscCall(VecAXPY(y,-1.0,x)); PetscCall(VecNorm(y,NORM_2,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); } if (m == n) { /* out-of-place MatSolveTranspose */ PetscCall(MatMultTranspose(mat,x,b)); PetscCall(MatSolveTranspose(F,b,y)); PetscCall(VecAXPY(y,-1.0,x)); PetscCall(VecNorm(y,NORM_2,&norm)); if (norm > tol) { PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); } } /* free space */ PetscCall(MatDestroy(&F)); PetscCall(MatDestroy(&mat)); PetscCall(MatDestroy(&RHS)); PetscCall(MatDestroy(&SOLU)); PetscCall(VecDestroy(&x)); PetscCall(VecDestroy(&b)); PetscCall(VecDestroy(&y)); PetscCall(VecDestroy(&ytmp)); } PetscCall(PetscFinalize()); return 0; } /*TEST test: test: requires: cuda suffix: seqdensecuda args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} output_file: output/ex1_1.out test: requires: cuda suffix: seqdensecuda_2 args: -ldl 0 -solver_type cuda output_file: output/ex1_1.out test: requires: cuda suffix: seqdensecuda_seqaijcusparse args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0 output_file: output/ex1_2.out test: requires: cuda viennacl suffix: seqdensecuda_seqaijviennacl args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0 output_file: output/ex1_2.out test: suffix: 4 args: -m 10 -n 10 output_file: output/ex1_1.out TEST*/