1 2 static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\ 3 For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ 4 For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; 5 6 #include <petscmat.h> 7 8 static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b) 9 { 10 PetscRandom rand; 11 Mat mat,RHS,SOLU; 12 PetscInt rstart, rend; 13 PetscInt cstart, cend; 14 PetscScalar value = 1.0; 15 Vec x, y, b; 16 17 PetscFunctionBegin; 18 /* create multiple vectors RHS and SOLU */ 19 PetscCall(MatCreate(PETSC_COMM_WORLD,&RHS)); 20 PetscCall(MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs)); 21 PetscCall(MatSetType(RHS,MATDENSE)); 22 PetscCall(MatSetOptionsPrefix(RHS,"rhs_")); 23 PetscCall(MatSetFromOptions(RHS)); 24 PetscCall(MatSeqDenseSetPreallocation(RHS,NULL)); 25 26 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD,&rand)); 27 PetscCall(PetscRandomSetFromOptions(rand)); 28 PetscCall(MatSetRandom(RHS,rand)); 29 30 if (m == n) { 31 PetscCall(MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU)); 32 } else { 33 PetscCall(MatCreate(PETSC_COMM_WORLD,&SOLU)); 34 PetscCall(MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs)); 35 PetscCall(MatSetType(SOLU,MATDENSE)); 36 PetscCall(MatSeqDenseSetPreallocation(SOLU,NULL)); 37 } 38 PetscCall(MatSetRandom(SOLU,rand)); 39 40 /* create matrix */ 41 PetscCall(MatCreate(PETSC_COMM_WORLD,&mat)); 42 PetscCall(MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n)); 43 PetscCall(MatSetType(mat,MATDENSE)); 44 PetscCall(MatSetFromOptions(mat)); 45 PetscCall(MatSetUp(mat)); 46 PetscCall(MatGetOwnershipRange(mat,&rstart,&rend)); 47 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 48 if (!full) { 49 for (PetscInt i=rstart; i<rend; i++) { 50 if (m == n) { 51 value = (PetscReal)i+1; 52 PetscCall(MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES)); 53 } else { 54 for (PetscInt j = cstart; j < cend; j++) { 55 value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.); 56 PetscCall(MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES)); 57 } 58 } 59 } 60 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 61 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 62 } else { 63 PetscCall(MatSetRandom(mat,rand)); 64 if (m == n) { 65 Mat T; 66 67 PetscCall(MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T)); 68 PetscCall(MatDestroy(&mat)); 69 mat = T; 70 } 71 } 72 73 /* create single vectors */ 74 PetscCall(MatCreateVecs(mat,&x,&b)); 75 PetscCall(VecDuplicate(x,&y)); 76 PetscCall(VecSet(x,value)); 77 PetscCall(PetscRandomDestroy(&rand)); 78 *_mat = mat; 79 *_RHS = RHS; 80 *_SOLU = SOLU; 81 *_x = x; 82 *_y = y; 83 *_b = b; 84 PetscFunctionReturn(0); 85 } 86 87 int main(int argc,char **argv) 88 { 89 Mat mat,F,RHS,SOLU; 90 MatInfo info; 91 PetscInt m = 15, n = 10,i,j,nrhs=2; 92 Vec x,y,b,ytmp; 93 IS perm; 94 PetscReal norm,tol=PETSC_SMALL; 95 PetscMPIInt size; 96 char solver[64]; 97 PetscBool inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE; 98 99 PetscFunctionBeginUser; 100 PetscCall(PetscInitialize(&argc,&argv,(char*) 0,help)); 101 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 102 PetscCheck(size == 1,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This is a uniprocessor example only!"); 103 PetscCall(PetscStrcpy(solver,"petsc")); 104 PetscCall(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 105 PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 106 PetscCall(PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL)); 107 PetscCall(PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL)); 108 PetscCall(PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL)); 109 PetscCall(PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL)); 110 PetscCall(PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL)); 111 112 PetscCall(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 113 PetscCall(VecDuplicate(y,&ytmp)); 114 115 /* Only SeqDense* support in-place factorizations and NULL permutations */ 116 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace)); 117 PetscCall(MatGetLocalSize(mat,&i,NULL)); 118 PetscCall(MatGetOwnershipRange(mat,&j,NULL)); 119 PetscCall(ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm)); 120 121 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 122 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n",(PetscInt)info.nz_used,(PetscInt)info.nz_allocated)); 123 PetscCall(MatMult(mat,x,b)); 124 125 /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ 126 /* in-place Cholesky */ 127 if (inplace) { 128 Mat RHS2; 129 130 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 131 if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 132 PetscCall(MatCholeskyFactor(F,perm,0)); 133 PetscCall(MatSolve(F,b,y)); 134 PetscCall(VecAXPY(y,-1.0,x)); 135 PetscCall(VecNorm(y,NORM_2,&norm)); 136 if (norm > tol) { 137 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm)); 138 } 139 140 PetscCall(MatMatSolve(F,RHS,SOLU)); 141 PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 142 PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 143 PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); 144 if (norm > tol) { 145 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm)); 146 } 147 PetscCall(MatDestroy(&F)); 148 PetscCall(MatDestroy(&RHS2)); 149 } 150 151 /* out-of-place Cholesky */ 152 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F)); 153 if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 154 PetscCall(MatCholeskyFactorSymbolic(F,mat,perm,0)); 155 PetscCall(MatCholeskyFactorNumeric(F,mat,0)); 156 PetscCall(MatSolve(F,b,y)); 157 PetscCall(VecAXPY(y,-1.0,x)); 158 PetscCall(VecNorm(y,NORM_2,&norm)); 159 if (norm > tol) { 160 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm)); 161 } 162 PetscCall(MatDestroy(&F)); 163 164 /* LU factorization - perms and factinfo are ignored by LAPACK */ 165 i = n-1; 166 PetscCall(MatZeroRows(mat,1,&i,-1.0,NULL,NULL)); 167 PetscCall(MatMult(mat,x,b)); 168 169 /* in-place LU */ 170 if (inplace) { 171 Mat RHS2; 172 173 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 174 PetscCall(MatLUFactor(F,perm,perm,0)); 175 PetscCall(MatSolve(F,b,y)); 176 PetscCall(VecAXPY(y,-1.0,x)); 177 PetscCall(VecNorm(y,NORM_2,&norm)); 178 if (norm > tol) { 179 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm)); 180 } 181 PetscCall(MatMatSolve(F,RHS,SOLU)); 182 PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 183 PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 184 PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); 185 if (norm > tol) { 186 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm)); 187 } 188 PetscCall(MatDestroy(&F)); 189 PetscCall(MatDestroy(&RHS2)); 190 } 191 192 /* out-of-place LU */ 193 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_LU,&F)); 194 PetscCall(MatLUFactorSymbolic(F,mat,perm,perm,0)); 195 PetscCall(MatLUFactorNumeric(F,mat,0)); 196 PetscCall(MatSolve(F,b,y)); 197 PetscCall(VecAXPY(y,-1.0,x)); 198 PetscCall(VecNorm(y,NORM_2,&norm)); 199 if (norm > tol) { 200 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm)); 201 } 202 203 /* free space */ 204 PetscCall(ISDestroy(&perm)); 205 PetscCall(MatDestroy(&F)); 206 PetscCall(MatDestroy(&mat)); 207 PetscCall(MatDestroy(&RHS)); 208 PetscCall(MatDestroy(&SOLU)); 209 PetscCall(VecDestroy(&x)); 210 PetscCall(VecDestroy(&b)); 211 PetscCall(VecDestroy(&y)); 212 PetscCall(VecDestroy(&ytmp)); 213 214 if (qr) { 215 /* setup rectanglar */ 216 PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 217 PetscCall(VecDuplicate(y,&ytmp)); 218 219 /* QR factorization - perms and factinfo are ignored by LAPACK */ 220 PetscCall(MatMult(mat,x,b)); 221 222 /* in-place QR */ 223 if (inplace) { 224 Mat SOLU2; 225 226 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 227 PetscCall(MatQRFactor(F,NULL,0)); 228 PetscCall(MatSolve(F,b,y)); 229 PetscCall(VecAXPY(y,-1.0,x)); 230 PetscCall(VecNorm(y,NORM_2,&norm)); 231 if (norm > tol) { 232 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm)); 233 } 234 PetscCall(MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS)); 235 PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2)); 236 PetscCall(MatMatSolve(F,RHS,SOLU2)); 237 PetscCall(MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN)); 238 PetscCall(MatNorm(SOLU2,NORM_FROBENIUS,&norm)); 239 if (norm > tol) { 240 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm)); 241 } 242 PetscCall(MatDestroy(&F)); 243 PetscCall(MatDestroy(&SOLU2)); 244 } 245 246 /* out-of-place QR */ 247 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_QR,&F)); 248 PetscCall(MatQRFactorSymbolic(F,mat,NULL,NULL)); 249 PetscCall(MatQRFactorNumeric(F,mat,NULL)); 250 PetscCall(MatSolve(F,b,y)); 251 PetscCall(VecAXPY(y,-1.0,x)); 252 PetscCall(VecNorm(y,NORM_2,&norm)); 253 if (norm > tol) { 254 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 255 } 256 257 if (m == n) { 258 /* out-of-place MatSolveTranspose */ 259 PetscCall(MatMultTranspose(mat,x,b)); 260 PetscCall(MatSolveTranspose(F,b,y)); 261 PetscCall(VecAXPY(y,-1.0,x)); 262 PetscCall(VecNorm(y,NORM_2,&norm)); 263 if (norm > tol) { 264 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 265 } 266 } 267 268 /* free space */ 269 PetscCall(MatDestroy(&F)); 270 PetscCall(MatDestroy(&mat)); 271 PetscCall(MatDestroy(&RHS)); 272 PetscCall(MatDestroy(&SOLU)); 273 PetscCall(VecDestroy(&x)); 274 PetscCall(VecDestroy(&b)); 275 PetscCall(VecDestroy(&y)); 276 PetscCall(VecDestroy(&ytmp)); 277 } 278 PetscCall(PetscFinalize()); 279 return 0; 280 } 281 282 /*TEST 283 284 test: 285 286 test: 287 requires: cuda 288 suffix: seqdensecuda 289 args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} 290 output_file: output/ex1_1.out 291 292 test: 293 requires: cuda 294 suffix: seqdensecuda_2 295 args: -ldl 0 -solver_type cuda 296 output_file: output/ex1_1.out 297 298 test: 299 requires: cuda 300 suffix: seqdensecuda_seqaijcusparse 301 args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0 302 output_file: output/ex1_2.out 303 304 test: 305 requires: cuda viennacl 306 suffix: seqdensecuda_seqaijviennacl 307 args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0 308 output_file: output/ex1_2.out 309 310 test: 311 suffix: 4 312 args: -m 10 -n 10 313 output_file: output/ex1_1.out 314 315 TEST*/ 316