1 2 static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\ 3 For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ 4 For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; 5 6 #include <petscmat.h> 7 8 static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b) 9 { 10 PetscRandom rand; 11 Mat mat,RHS,SOLU; 12 PetscInt rstart, rend; 13 PetscInt cstart, cend; 14 PetscScalar value = 1.0; 15 Vec x, y, b; 16 17 PetscFunctionBegin; 18 /* create multiple vectors RHS and SOLU */ 19 CHKERRQ(MatCreate(PETSC_COMM_WORLD,&RHS)); 20 CHKERRQ(MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs)); 21 CHKERRQ(MatSetType(RHS,MATDENSE)); 22 CHKERRQ(MatSetOptionsPrefix(RHS,"rhs_")); 23 CHKERRQ(MatSetFromOptions(RHS)); 24 CHKERRQ(MatSeqDenseSetPreallocation(RHS,NULL)); 25 26 CHKERRQ(PetscRandomCreate(PETSC_COMM_WORLD,&rand)); 27 CHKERRQ(PetscRandomSetFromOptions(rand)); 28 CHKERRQ(MatSetRandom(RHS,rand)); 29 30 if (m == n) { 31 CHKERRQ(MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU)); 32 } else { 33 CHKERRQ(MatCreate(PETSC_COMM_WORLD,&SOLU)); 34 CHKERRQ(MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs)); 35 CHKERRQ(MatSetType(SOLU,MATDENSE)); 36 CHKERRQ(MatSeqDenseSetPreallocation(SOLU,NULL)); 37 } 38 CHKERRQ(MatSetRandom(SOLU,rand)); 39 40 /* create matrix */ 41 CHKERRQ(MatCreate(PETSC_COMM_WORLD,&mat)); 42 CHKERRQ(MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n)); 43 CHKERRQ(MatSetType(mat,MATDENSE)); 44 CHKERRQ(MatSetFromOptions(mat)); 45 CHKERRQ(MatSetUp(mat)); 46 CHKERRQ(MatGetOwnershipRange(mat,&rstart,&rend)); 47 CHKERRQ(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 48 if (!full) { 49 for (PetscInt i=rstart; i<rend; i++) { 50 if (m == n) { 51 value = (PetscReal)i+1; 52 CHKERRQ(MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES)); 53 } else { 54 for (PetscInt j = cstart; j < cend; j++) { 55 value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.); 56 CHKERRQ(MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES)); 57 } 58 } 59 } 60 CHKERRQ(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 61 CHKERRQ(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 62 } else { 63 CHKERRQ(MatSetRandom(mat,rand)); 64 if (m == n) { 65 Mat T; 66 67 CHKERRQ(MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T)); 68 CHKERRQ(MatDestroy(&mat)); 69 mat = T; 70 } 71 } 72 73 /* create single vectors */ 74 CHKERRQ(MatCreateVecs(mat,&x,&b)); 75 CHKERRQ(VecDuplicate(x,&y)); 76 CHKERRQ(VecSet(x,value)); 77 CHKERRQ(PetscRandomDestroy(&rand)); 78 *_mat = mat; 79 *_RHS = RHS; 80 *_SOLU = SOLU; 81 *_x = x; 82 *_y = y; 83 *_b = b; 84 PetscFunctionReturn(0); 85 } 86 87 int main(int argc,char **argv) 88 { 89 Mat mat,F,RHS,SOLU; 90 MatInfo info; 91 PetscErrorCode ierr; 92 PetscInt m = 15, n = 10,i,j,nrhs=2; 93 Vec x,y,b,ytmp; 94 IS perm; 95 PetscReal norm,tol=PETSC_SMALL; 96 PetscMPIInt size; 97 char solver[64]; 98 PetscBool inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE; 99 100 ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 101 CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 102 PetscCheckFalse(size != 1,PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); 103 CHKERRQ(PetscStrcpy(solver,"petsc")); 104 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL)); 105 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 106 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL)); 107 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL)); 108 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL)); 109 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL)); 110 CHKERRQ(PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL)); 111 112 CHKERRQ(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 113 CHKERRQ(VecDuplicate(y,&ytmp)); 114 115 /* Only SeqDense* support in-place factorizations and NULL permutations */ 116 CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace)); 117 CHKERRQ(MatGetLocalSize(mat,&i,NULL)); 118 CHKERRQ(MatGetOwnershipRange(mat,&j,NULL)); 119 CHKERRQ(ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm)); 120 121 CHKERRQ(MatGetInfo(mat,MAT_LOCAL,&info)); 122 ierr = PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n", 123 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);CHKERRQ(ierr); 124 CHKERRQ(MatMult(mat,x,b)); 125 126 /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ 127 /* in-place Cholesky */ 128 if (inplace) { 129 Mat RHS2; 130 131 CHKERRQ(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 132 if (!ldl) CHKERRQ(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 133 CHKERRQ(MatCholeskyFactor(F,perm,0)); 134 CHKERRQ(MatSolve(F,b,y)); 135 CHKERRQ(VecAXPY(y,-1.0,x)); 136 CHKERRQ(VecNorm(y,NORM_2,&norm)); 137 if (norm > tol) { 138 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm)); 139 } 140 141 CHKERRQ(MatMatSolve(F,RHS,SOLU)); 142 CHKERRQ(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 143 CHKERRQ(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 144 CHKERRQ(MatNorm(RHS,NORM_FROBENIUS,&norm)); 145 if (norm > tol) { 146 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm)); 147 } 148 CHKERRQ(MatDestroy(&F)); 149 CHKERRQ(MatDestroy(&RHS2)); 150 } 151 152 /* out-of-place Cholesky */ 153 CHKERRQ(MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F)); 154 if (!ldl) CHKERRQ(MatSetOption(F,MAT_SPD,PETSC_TRUE)); 155 CHKERRQ(MatCholeskyFactorSymbolic(F,mat,perm,0)); 156 CHKERRQ(MatCholeskyFactorNumeric(F,mat,0)); 157 CHKERRQ(MatSolve(F,b,y)); 158 CHKERRQ(VecAXPY(y,-1.0,x)); 159 CHKERRQ(VecNorm(y,NORM_2,&norm)); 160 if (norm > tol) { 161 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm)); 162 } 163 CHKERRQ(MatDestroy(&F)); 164 165 /* LU factorization - perms and factinfo are ignored by LAPACK */ 166 i = n-1; 167 CHKERRQ(MatZeroRows(mat,1,&i,-1.0,NULL,NULL)); 168 CHKERRQ(MatMult(mat,x,b)); 169 170 /* in-place LU */ 171 if (inplace) { 172 Mat RHS2; 173 174 CHKERRQ(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 175 CHKERRQ(MatLUFactor(F,perm,perm,0)); 176 CHKERRQ(MatSolve(F,b,y)); 177 CHKERRQ(VecAXPY(y,-1.0,x)); 178 CHKERRQ(VecNorm(y,NORM_2,&norm)); 179 if (norm > tol) { 180 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm)); 181 } 182 CHKERRQ(MatMatSolve(F,RHS,SOLU)); 183 CHKERRQ(MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2)); 184 CHKERRQ(MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN)); 185 CHKERRQ(MatNorm(RHS,NORM_FROBENIUS,&norm)); 186 if (norm > tol) { 187 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm)); 188 } 189 CHKERRQ(MatDestroy(&F)); 190 CHKERRQ(MatDestroy(&RHS2)); 191 } 192 193 /* out-of-place LU */ 194 CHKERRQ(MatGetFactor(mat,solver,MAT_FACTOR_LU,&F)); 195 CHKERRQ(MatLUFactorSymbolic(F,mat,perm,perm,0)); 196 CHKERRQ(MatLUFactorNumeric(F,mat,0)); 197 CHKERRQ(MatSolve(F,b,y)); 198 CHKERRQ(VecAXPY(y,-1.0,x)); 199 CHKERRQ(VecNorm(y,NORM_2,&norm)); 200 if (norm > tol) { 201 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm)); 202 } 203 204 /* free space */ 205 CHKERRQ(ISDestroy(&perm)); 206 CHKERRQ(MatDestroy(&F)); 207 CHKERRQ(MatDestroy(&mat)); 208 CHKERRQ(MatDestroy(&RHS)); 209 CHKERRQ(MatDestroy(&SOLU)); 210 CHKERRQ(VecDestroy(&x)); 211 CHKERRQ(VecDestroy(&b)); 212 CHKERRQ(VecDestroy(&y)); 213 CHKERRQ(VecDestroy(&ytmp)); 214 215 if (qr) { 216 /* setup rectanglar */ 217 CHKERRQ(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b)); 218 CHKERRQ(VecDuplicate(y,&ytmp)); 219 220 /* QR factorization - perms and factinfo are ignored by LAPACK */ 221 CHKERRQ(MatMult(mat,x,b)); 222 223 /* in-place QR */ 224 if (inplace) { 225 Mat SOLU2; 226 227 CHKERRQ(MatDuplicate(mat,MAT_COPY_VALUES,&F)); 228 CHKERRQ(MatQRFactor(F,NULL,0)); 229 CHKERRQ(MatSolve(F,b,y)); 230 CHKERRQ(VecAXPY(y,-1.0,x)); 231 CHKERRQ(VecNorm(y,NORM_2,&norm)); 232 if (norm > tol) { 233 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm)); 234 } 235 CHKERRQ(MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS)); 236 CHKERRQ(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2)); 237 CHKERRQ(MatMatSolve(F,RHS,SOLU2)); 238 CHKERRQ(MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN)); 239 CHKERRQ(MatNorm(SOLU2,NORM_FROBENIUS,&norm)); 240 if (norm > tol) { 241 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm)); 242 } 243 CHKERRQ(MatDestroy(&F)); 244 CHKERRQ(MatDestroy(&SOLU2)); 245 } 246 247 /* out-of-place QR */ 248 CHKERRQ(MatGetFactor(mat,solver,MAT_FACTOR_QR,&F)); 249 CHKERRQ(MatQRFactorSymbolic(F,mat,NULL,NULL)); 250 CHKERRQ(MatQRFactorNumeric(F,mat,NULL)); 251 CHKERRQ(MatSolve(F,b,y)); 252 CHKERRQ(VecAXPY(y,-1.0,x)); 253 CHKERRQ(VecNorm(y,NORM_2,&norm)); 254 if (norm > tol) { 255 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 256 } 257 258 if (m == n) { 259 /* out-of-place MatSolveTranspose */ 260 CHKERRQ(MatMultTranspose(mat,x,b)); 261 CHKERRQ(MatSolveTranspose(F,b,y)); 262 CHKERRQ(VecAXPY(y,-1.0,x)); 263 CHKERRQ(VecNorm(y,NORM_2,&norm)); 264 if (norm > tol) { 265 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm)); 266 } 267 } 268 269 /* free space */ 270 CHKERRQ(MatDestroy(&F)); 271 CHKERRQ(MatDestroy(&mat)); 272 CHKERRQ(MatDestroy(&RHS)); 273 CHKERRQ(MatDestroy(&SOLU)); 274 CHKERRQ(VecDestroy(&x)); 275 CHKERRQ(VecDestroy(&b)); 276 CHKERRQ(VecDestroy(&y)); 277 CHKERRQ(VecDestroy(&ytmp)); 278 } 279 ierr = PetscFinalize(); 280 return ierr; 281 } 282 283 /*TEST 284 285 test: 286 287 test: 288 requires: cuda 289 suffix: seqdensecuda 290 args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} 291 output_file: output/ex1_1.out 292 293 test: 294 requires: cuda 295 suffix: seqdensecuda_2 296 args: -ldl 0 -solver_type cuda 297 output_file: output/ex1_1.out 298 299 test: 300 requires: cuda 301 suffix: seqdensecuda_seqaijcusparse 302 args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0 303 output_file: output/ex1_2.out 304 305 test: 306 requires: cuda viennacl 307 suffix: seqdensecuda_seqaijviennacl 308 args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0 309 output_file: output/ex1_2.out 310 311 test: 312 suffix: 4 313 args: -m 10 -n 10 314 output_file: output/ex1_1.out 315 316 TEST*/ 317