1 2 static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\ 3 For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ 4 For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; 5 6 #include <petscmat.h> 7 8 static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b) 9 { 10 PetscRandom rand; 11 Mat mat,RHS,SOLU; 12 PetscInt rstart, rend; 13 PetscInt cstart, cend; 14 PetscScalar value = 1.0; 15 Vec x, y, b; 16 17 PetscFunctionBegin; 18 /* create multiple vectors RHS and SOLU */ 19 PetscCall(MatCreate(PETSC_COMM_WORLD,&RHS)); 20 PetscCall(MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs)); 21 PetscCall(MatSetType(RHS,MATDENSE)); 22 PetscCall(MatSetOptionsPrefix(RHS,"rhs_")); 23 PetscCall(MatSetFromOptions(RHS)); 24 PetscCall(MatSeqDenseSetPreallocation(RHS,NULL)); 25 26 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD,&rand)); 27 PetscCall(PetscRandomSetFromOptions(rand)); 28 PetscCall(MatSetRandom(RHS,rand)); 29 30 if (m == n) { 31 PetscCall(MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU)); 32 } else { 33 PetscCall(MatCreate(PETSC_COMM_WORLD,&SOLU)); 34 PetscCall(MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs)); 35 PetscCall(MatSetType(SOLU,MATDENSE)); 36 PetscCall(MatSeqDenseSetPreallocation(SOLU,NULL)); 37 } 38 PetscCall(MatSetRandom(SOLU,rand)); 39 40 /* create matrix */ 41 PetscCall(MatCreate(PETSC_COMM_WORLD,&mat)); 42 PetscCall(MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n)); 43 PetscCall(MatSetType(mat,MATDENSE)); 44 PetscCall(MatSetFromOptions(mat)); 45 PetscCall(MatSetUp(mat)); 46 PetscCall(MatGetOwnershipRange(mat,&rstart,&rend)); 47 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 48 if (!full) { 49 for (PetscInt i=rstart; i<rend; i++) { 50 if (m == n) { 51 value = (PetscReal)i+1; 52 PetscCall(MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES)); 53 } else { 54 for (PetscInt j = cstart; j < cend; j++) { 55 value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.); 56 PetscCall(MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES)); 57 } 58 } 59 } 60 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 61 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 62 } else { 63 PetscCall(MatSetRandom(mat,rand)); 64 if (m == n) { 65 Mat T; 66 67 PetscCall(MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T)); 68 PetscCall(MatDestroy(&mat)); 69 mat = T; 70 } 71 } 72 73 /* create single vectors */ 74 PetscCall(MatCreateVecs(mat,&x,&b)); 75 PetscCall(VecDuplicate(x,&y)); 76 PetscCall(VecSet(x,value)); 77 PetscCall(PetscRandomDestroy(&rand)); 78 *_mat = mat; 79 *_RHS = RHS; 80 *_SOLU = SOLU; 81 *_x = x; 82 *_y = y; 83 *_b = b; 84 PetscFunctionReturn(0); 85 } 86 87 int main(int argc,char **argv) 88 { 89 Mat mat,F,RHS,SOLU; 90 MatInfo info; 91 PetscErrorCode ierr; 92 PetscInt m = 15, n = 10,i,j,nrhs=2; 93 Vec x,y,b,ytmp; 94 IS perm; 95 PetscReal norm,tol=PETSC_SMALL; 96 PetscMPIInt size; 97 char solver[64]; 98 PetscBool inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE; 99 100 PetscCall(PetscInitialize(&argc,&argv,(char*) 0,help)); 101 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 102 PetscCheck(size == 1,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This is a uniprocessor example only!"); 103 PetscCall(PetscStrcpy(solver,"petsc")); 104 PetscCall(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 105 PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 106 PetscCall(PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL)); 107 PetscCall(PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL)); 108 PetscCall(PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL)); 109 PetscCall(PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL)); 110 PetscCall(PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL)); 111 112 PetscCall(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 113 PetscCall(VecDuplicate(y,&ytmp)); 114 115 /* Only SeqDense* support in-place factorizations and NULL permutations */ 116 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace)); 117 PetscCall(MatGetLocalSize(mat,&i,NULL)); 118 PetscCall(MatGetOwnershipRange(mat,&j,NULL)); 119 PetscCall(ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm)); 120 121 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 122 ierr = PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n", 123 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);PetscCall(ierr); 124 PetscCall(MatMult(mat,x,b)); 125 126 /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ 127 /* in-place Cholesky */ 128 if (inplace) { 129 Mat RHS2; 130 131 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 132 if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 133 PetscCall(MatCholeskyFactor(F,perm,0)); 134 PetscCall(MatSolve(F,b,y)); 135 PetscCall(VecAXPY(y,-1.0,x)); 136 PetscCall(VecNorm(y,NORM_2,&norm)); 137 if (norm > tol) { 138 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm)); 139 } 140 141 PetscCall(MatMatSolve(F,RHS,SOLU)); 142 PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 143 PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 144 PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); 145 if (norm > tol) { 146 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm)); 147 } 148 PetscCall(MatDestroy(&F)); 149 PetscCall(MatDestroy(&RHS2)); 150 } 151 152 /* out-of-place Cholesky */ 153 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F)); 154 if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 155 PetscCall(MatCholeskyFactorSymbolic(F,mat,perm,0)); 156 PetscCall(MatCholeskyFactorNumeric(F,mat,0)); 157 PetscCall(MatSolve(F,b,y)); 158 PetscCall(VecAXPY(y,-1.0,x)); 159 PetscCall(VecNorm(y,NORM_2,&norm)); 160 if (norm > tol) { 161 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm)); 162 } 163 PetscCall(MatDestroy(&F)); 164 165 /* LU factorization - perms and factinfo are ignored by LAPACK */ 166 i = n-1; 167 PetscCall(MatZeroRows(mat,1,&i,-1.0,NULL,NULL)); 168 PetscCall(MatMult(mat,x,b)); 169 170 /* in-place LU */ 171 if (inplace) { 172 Mat RHS2; 173 174 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 175 PetscCall(MatLUFactor(F,perm,perm,0)); 176 PetscCall(MatSolve(F,b,y)); 177 PetscCall(VecAXPY(y,-1.0,x)); 178 PetscCall(VecNorm(y,NORM_2,&norm)); 179 if (norm > tol) { 180 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm)); 181 } 182 PetscCall(MatMatSolve(F,RHS,SOLU)); 183 PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 184 PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 185 PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); 186 if (norm > tol) { 187 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm)); 188 } 189 PetscCall(MatDestroy(&F)); 190 PetscCall(MatDestroy(&RHS2)); 191 } 192 193 /* out-of-place LU */ 194 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_LU,&F)); 195 PetscCall(MatLUFactorSymbolic(F,mat,perm,perm,0)); 196 PetscCall(MatLUFactorNumeric(F,mat,0)); 197 PetscCall(MatSolve(F,b,y)); 198 PetscCall(VecAXPY(y,-1.0,x)); 199 PetscCall(VecNorm(y,NORM_2,&norm)); 200 if (norm > tol) { 201 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm)); 202 } 203 204 /* free space */ 205 PetscCall(ISDestroy(&perm)); 206 PetscCall(MatDestroy(&F)); 207 PetscCall(MatDestroy(&mat)); 208 PetscCall(MatDestroy(&RHS)); 209 PetscCall(MatDestroy(&SOLU)); 210 PetscCall(VecDestroy(&x)); 211 PetscCall(VecDestroy(&b)); 212 PetscCall(VecDestroy(&y)); 213 PetscCall(VecDestroy(&ytmp)); 214 215 if (qr) { 216 /* setup rectanglar */ 217 PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 218 PetscCall(VecDuplicate(y,&ytmp)); 219 220 /* QR factorization - perms and factinfo are ignored by LAPACK */ 221 PetscCall(MatMult(mat,x,b)); 222 223 /* in-place QR */ 224 if (inplace) { 225 Mat SOLU2; 226 227 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 228 PetscCall(MatQRFactor(F,NULL,0)); 229 PetscCall(MatSolve(F,b,y)); 230 PetscCall(VecAXPY(y,-1.0,x)); 231 PetscCall(VecNorm(y,NORM_2,&norm)); 232 if (norm > tol) { 233 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm)); 234 } 235 PetscCall(MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS)); 236 PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2)); 237 PetscCall(MatMatSolve(F,RHS,SOLU2)); 238 PetscCall(MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN)); 239 PetscCall(MatNorm(SOLU2,NORM_FROBENIUS,&norm)); 240 if (norm > tol) { 241 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm)); 242 } 243 PetscCall(MatDestroy(&F)); 244 PetscCall(MatDestroy(&SOLU2)); 245 } 246 247 /* out-of-place QR */ 248 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_QR,&F)); 249 PetscCall(MatQRFactorSymbolic(F,mat,NULL,NULL)); 250 PetscCall(MatQRFactorNumeric(F,mat,NULL)); 251 PetscCall(MatSolve(F,b,y)); 252 PetscCall(VecAXPY(y,-1.0,x)); 253 PetscCall(VecNorm(y,NORM_2,&norm)); 254 if (norm > tol) { 255 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 256 } 257 258 if (m == n) { 259 /* out-of-place MatSolveTranspose */ 260 PetscCall(MatMultTranspose(mat,x,b)); 261 PetscCall(MatSolveTranspose(F,b,y)); 262 PetscCall(VecAXPY(y,-1.0,x)); 263 PetscCall(VecNorm(y,NORM_2,&norm)); 264 if (norm > tol) { 265 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 266 } 267 } 268 269 /* free space */ 270 PetscCall(MatDestroy(&F)); 271 PetscCall(MatDestroy(&mat)); 272 PetscCall(MatDestroy(&RHS)); 273 PetscCall(MatDestroy(&SOLU)); 274 PetscCall(VecDestroy(&x)); 275 PetscCall(VecDestroy(&b)); 276 PetscCall(VecDestroy(&y)); 277 PetscCall(VecDestroy(&ytmp)); 278 } 279 PetscCall(PetscFinalize()); 280 return 0; 281 } 282 283 /*TEST 284 285 test: 286 287 test: 288 requires: cuda 289 suffix: seqdensecuda 290 args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} 291 output_file: output/ex1_1.out 292 293 test: 294 requires: cuda 295 suffix: seqdensecuda_2 296 args: -ldl 0 -solver_type cuda 297 output_file: output/ex1_1.out 298 299 test: 300 requires: cuda 301 suffix: seqdensecuda_seqaijcusparse 302 args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0 303 output_file: output/ex1_2.out 304 305 test: 306 requires: cuda viennacl 307 suffix: seqdensecuda_seqaijviennacl 308 args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0 309 output_file: output/ex1_2.out 310 311 test: 312 suffix: 4 313 args: -m 10 -n 10 314 output_file: output/ex1_1.out 315 316 TEST*/ 317