1 2 static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\ 3 For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ 4 For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; 5 6 #include <petscmat.h> 7 8 static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b) 9 { 10 PetscRandom rand; 11 Mat mat,RHS,SOLU; 12 PetscInt rstart, rend; 13 PetscInt cstart, cend; 14 PetscScalar value = 1.0; 15 Vec x, y, b; 16 17 PetscFunctionBegin; 18 /* create multiple vectors RHS and SOLU */ 19 PetscCall(MatCreate(PETSC_COMM_WORLD,&RHS)); 20 PetscCall(MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs)); 21 PetscCall(MatSetType(RHS,MATDENSE)); 22 PetscCall(MatSetOptionsPrefix(RHS,"rhs_")); 23 PetscCall(MatSetFromOptions(RHS)); 24 PetscCall(MatSeqDenseSetPreallocation(RHS,NULL)); 25 26 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD,&rand)); 27 PetscCall(PetscRandomSetFromOptions(rand)); 28 PetscCall(MatSetRandom(RHS,rand)); 29 30 if (m == n) { 31 PetscCall(MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU)); 32 } else { 33 PetscCall(MatCreate(PETSC_COMM_WORLD,&SOLU)); 34 PetscCall(MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs)); 35 PetscCall(MatSetType(SOLU,MATDENSE)); 36 PetscCall(MatSeqDenseSetPreallocation(SOLU,NULL)); 37 } 38 PetscCall(MatSetRandom(SOLU,rand)); 39 40 /* create matrix */ 41 PetscCall(MatCreate(PETSC_COMM_WORLD,&mat)); 42 PetscCall(MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n)); 43 PetscCall(MatSetType(mat,MATDENSE)); 44 PetscCall(MatSetFromOptions(mat)); 45 PetscCall(MatSetUp(mat)); 46 PetscCall(MatGetOwnershipRange(mat,&rstart,&rend)); 47 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 48 if (!full) { 49 for (PetscInt i=rstart; i<rend; i++) { 50 if (m == n) { 51 value = (PetscReal)i+1; 52 PetscCall(MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES)); 53 } else { 54 for (PetscInt j = cstart; j < cend; j++) { 55 value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.); 56 PetscCall(MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES)); 57 } 58 } 59 } 60 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 61 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 62 } else { 63 PetscCall(MatSetRandom(mat,rand)); 64 if (m == n) { 65 Mat T; 66 67 PetscCall(MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T)); 68 PetscCall(MatDestroy(&mat)); 69 mat = T; 70 } 71 } 72 73 /* create single vectors */ 74 PetscCall(MatCreateVecs(mat,&x,&b)); 75 PetscCall(VecDuplicate(x,&y)); 76 PetscCall(VecSet(x,value)); 77 PetscCall(PetscRandomDestroy(&rand)); 78 *_mat = mat; 79 *_RHS = RHS; 80 *_SOLU = SOLU; 81 *_x = x; 82 *_y = y; 83 *_b = b; 84 PetscFunctionReturn(0); 85 } 86 87 int main(int argc,char **argv) 88 { 89 Mat mat,F,RHS,SOLU; 90 MatInfo info; 91 PetscInt m = 15, n = 10,i,j,nrhs=2; 92 Vec x,y,b,ytmp; 93 IS perm; 94 PetscReal norm,tol=PETSC_SMALL; 95 PetscMPIInt size; 96 char solver[64]; 97 PetscBool inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE; 98 99 PetscCall(PetscInitialize(&argc,&argv,(char*) 0,help)); 100 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 101 PetscCheck(size == 1,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This is a uniprocessor example only!"); 102 PetscCall(PetscStrcpy(solver,"petsc")); 103 PetscCall(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 104 PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 105 PetscCall(PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL)); 106 PetscCall(PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL)); 107 PetscCall(PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL)); 108 PetscCall(PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL)); 109 PetscCall(PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL)); 110 111 PetscCall(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 112 PetscCall(VecDuplicate(y,&ytmp)); 113 114 /* Only SeqDense* support in-place factorizations and NULL permutations */ 115 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace)); 116 PetscCall(MatGetLocalSize(mat,&i,NULL)); 117 PetscCall(MatGetOwnershipRange(mat,&j,NULL)); 118 PetscCall(ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm)); 119 120 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 121 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n",(PetscInt)info.nz_used,(PetscInt)info.nz_allocated)); 122 PetscCall(MatMult(mat,x,b)); 123 124 /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ 125 /* in-place Cholesky */ 126 if (inplace) { 127 Mat RHS2; 128 129 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 130 if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 131 PetscCall(MatCholeskyFactor(F,perm,0)); 132 PetscCall(MatSolve(F,b,y)); 133 PetscCall(VecAXPY(y,-1.0,x)); 134 PetscCall(VecNorm(y,NORM_2,&norm)); 135 if (norm > tol) { 136 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm)); 137 } 138 139 PetscCall(MatMatSolve(F,RHS,SOLU)); 140 PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 141 PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 142 PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); 143 if (norm > tol) { 144 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm)); 145 } 146 PetscCall(MatDestroy(&F)); 147 PetscCall(MatDestroy(&RHS2)); 148 } 149 150 /* out-of-place Cholesky */ 151 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F)); 152 if (!ldl) PetscCall(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 153 PetscCall(MatCholeskyFactorSymbolic(F,mat,perm,0)); 154 PetscCall(MatCholeskyFactorNumeric(F,mat,0)); 155 PetscCall(MatSolve(F,b,y)); 156 PetscCall(VecAXPY(y,-1.0,x)); 157 PetscCall(VecNorm(y,NORM_2,&norm)); 158 if (norm > tol) { 159 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm)); 160 } 161 PetscCall(MatDestroy(&F)); 162 163 /* LU factorization - perms and factinfo are ignored by LAPACK */ 164 i = n-1; 165 PetscCall(MatZeroRows(mat,1,&i,-1.0,NULL,NULL)); 166 PetscCall(MatMult(mat,x,b)); 167 168 /* in-place LU */ 169 if (inplace) { 170 Mat RHS2; 171 172 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 173 PetscCall(MatLUFactor(F,perm,perm,0)); 174 PetscCall(MatSolve(F,b,y)); 175 PetscCall(VecAXPY(y,-1.0,x)); 176 PetscCall(VecNorm(y,NORM_2,&norm)); 177 if (norm > tol) { 178 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm)); 179 } 180 PetscCall(MatMatSolve(F,RHS,SOLU)); 181 PetscCall(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 182 PetscCall(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 183 PetscCall(MatNorm(RHS,NORM_FROBENIUS,&norm)); 184 if (norm > tol) { 185 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm)); 186 } 187 PetscCall(MatDestroy(&F)); 188 PetscCall(MatDestroy(&RHS2)); 189 } 190 191 /* out-of-place LU */ 192 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_LU,&F)); 193 PetscCall(MatLUFactorSymbolic(F,mat,perm,perm,0)); 194 PetscCall(MatLUFactorNumeric(F,mat,0)); 195 PetscCall(MatSolve(F,b,y)); 196 PetscCall(VecAXPY(y,-1.0,x)); 197 PetscCall(VecNorm(y,NORM_2,&norm)); 198 if (norm > tol) { 199 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm)); 200 } 201 202 /* free space */ 203 PetscCall(ISDestroy(&perm)); 204 PetscCall(MatDestroy(&F)); 205 PetscCall(MatDestroy(&mat)); 206 PetscCall(MatDestroy(&RHS)); 207 PetscCall(MatDestroy(&SOLU)); 208 PetscCall(VecDestroy(&x)); 209 PetscCall(VecDestroy(&b)); 210 PetscCall(VecDestroy(&y)); 211 PetscCall(VecDestroy(&ytmp)); 212 213 if (qr) { 214 /* setup rectanglar */ 215 PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 216 PetscCall(VecDuplicate(y,&ytmp)); 217 218 /* QR factorization - perms and factinfo are ignored by LAPACK */ 219 PetscCall(MatMult(mat,x,b)); 220 221 /* in-place QR */ 222 if (inplace) { 223 Mat SOLU2; 224 225 PetscCall(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 226 PetscCall(MatQRFactor(F,NULL,0)); 227 PetscCall(MatSolve(F,b,y)); 228 PetscCall(VecAXPY(y,-1.0,x)); 229 PetscCall(VecNorm(y,NORM_2,&norm)); 230 if (norm > tol) { 231 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm)); 232 } 233 PetscCall(MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS)); 234 PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2)); 235 PetscCall(MatMatSolve(F,RHS,SOLU2)); 236 PetscCall(MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN)); 237 PetscCall(MatNorm(SOLU2,NORM_FROBENIUS,&norm)); 238 if (norm > tol) { 239 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm)); 240 } 241 PetscCall(MatDestroy(&F)); 242 PetscCall(MatDestroy(&SOLU2)); 243 } 244 245 /* out-of-place QR */ 246 PetscCall(MatGetFactor(mat,solver,MAT_FACTOR_QR,&F)); 247 PetscCall(MatQRFactorSymbolic(F,mat,NULL,NULL)); 248 PetscCall(MatQRFactorNumeric(F,mat,NULL)); 249 PetscCall(MatSolve(F,b,y)); 250 PetscCall(VecAXPY(y,-1.0,x)); 251 PetscCall(VecNorm(y,NORM_2,&norm)); 252 if (norm > tol) { 253 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 254 } 255 256 if (m == n) { 257 /* out-of-place MatSolveTranspose */ 258 PetscCall(MatMultTranspose(mat,x,b)); 259 PetscCall(MatSolveTranspose(F,b,y)); 260 PetscCall(VecAXPY(y,-1.0,x)); 261 PetscCall(VecNorm(y,NORM_2,&norm)); 262 if (norm > tol) { 263 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 264 } 265 } 266 267 /* free space */ 268 PetscCall(MatDestroy(&F)); 269 PetscCall(MatDestroy(&mat)); 270 PetscCall(MatDestroy(&RHS)); 271 PetscCall(MatDestroy(&SOLU)); 272 PetscCall(VecDestroy(&x)); 273 PetscCall(VecDestroy(&b)); 274 PetscCall(VecDestroy(&y)); 275 PetscCall(VecDestroy(&ytmp)); 276 } 277 PetscCall(PetscFinalize()); 278 return 0; 279 } 280 281 /*TEST 282 283 test: 284 285 test: 286 requires: cuda 287 suffix: seqdensecuda 288 args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} 289 output_file: output/ex1_1.out 290 291 test: 292 requires: cuda 293 suffix: seqdensecuda_2 294 args: -ldl 0 -solver_type cuda 295 output_file: output/ex1_1.out 296 297 test: 298 requires: cuda 299 suffix: seqdensecuda_seqaijcusparse 300 args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0 301 output_file: output/ex1_2.out 302 303 test: 304 requires: cuda viennacl 305 suffix: seqdensecuda_seqaijviennacl 306 args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0 307 output_file: output/ex1_2.out 308 309 test: 310 suffix: 4 311 args: -m 10 -n 10 312 output_file: output/ex1_1.out 313 314 TEST*/ 315