1 2 static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\ 3 For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK. \n\ 4 For MATSEQDENSECUDA, it uses cusolverDn routines \n\n"; 5 6 #include <petscmat.h> 7 8 static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b) 9 { 10 PetscRandom rand; 11 Mat mat,RHS,SOLU; 12 PetscInt rstart, rend; 13 PetscInt cstart, cend; 14 PetscScalar value = 1.0; 15 Vec x, y, b; 16 PetscErrorCode ierr; 17 18 PetscFunctionBegin; 19 /* create multiple vectors RHS and SOLU */ 20 ierr = MatCreate(PETSC_COMM_WORLD,&RHS);CHKERRQ(ierr); 21 ierr = MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs);CHKERRQ(ierr); 22 ierr = MatSetType(RHS,MATDENSE);CHKERRQ(ierr); 23 ierr = MatSetOptionsPrefix(RHS,"rhs_");CHKERRQ(ierr); 24 ierr = MatSetFromOptions(RHS);CHKERRQ(ierr); 25 ierr = MatSeqDenseSetPreallocation(RHS,NULL);CHKERRQ(ierr); 26 27 ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rand);CHKERRQ(ierr); 28 ierr = PetscRandomSetFromOptions(rand);CHKERRQ(ierr); 29 ierr = MatSetRandom(RHS,rand);CHKERRQ(ierr); 30 31 if (m == n) { 32 ierr = MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU);CHKERRQ(ierr); 33 } else { 34 ierr = MatCreate(PETSC_COMM_WORLD,&SOLU);CHKERRQ(ierr); 35 ierr = MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs);CHKERRQ(ierr); 36 ierr = MatSetType(SOLU,MATDENSE);CHKERRQ(ierr); 37 ierr = MatSeqDenseSetPreallocation(SOLU,NULL);CHKERRQ(ierr); 38 } 39 ierr = MatSetRandom(SOLU,rand);CHKERRQ(ierr); 40 41 /* create matrix */ 42 ierr = MatCreate(PETSC_COMM_WORLD,&mat);CHKERRQ(ierr); 43 ierr = MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n);CHKERRQ(ierr); 44 ierr = MatSetType(mat,MATDENSE);CHKERRQ(ierr); 45 ierr = MatSetFromOptions(mat);CHKERRQ(ierr); 46 ierr = MatSetUp(mat);CHKERRQ(ierr); 47 ierr = MatGetOwnershipRange(mat,&rstart,&rend);CHKERRQ(ierr); 48 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 49 if (!full) { 50 for (PetscInt i=rstart; i<rend; i++) { 51 if (m == n) { 52 value = (PetscReal)i+1; 53 ierr = MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES);CHKERRQ(ierr); 54 } else { 55 for (PetscInt j = cstart; j < cend; j++) { 56 value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.); 57 ierr = MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES);CHKERRQ(ierr); 58 } 59 } 60 } 61 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 62 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 63 } else { 64 ierr = MatSetRandom(mat,rand);CHKERRQ(ierr); 65 if (m == n) { 66 Mat T; 67 68 ierr = MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);CHKERRQ(ierr); 69 ierr = MatDestroy(&mat);CHKERRQ(ierr); 70 mat = T; 71 } 72 } 73 74 /* create single vectors */ 75 ierr = MatCreateVecs(mat,&x,&b);CHKERRQ(ierr); 76 ierr = VecDuplicate(x,&y);CHKERRQ(ierr); 77 ierr = VecSet(x,value);CHKERRQ(ierr); 78 ierr = PetscRandomDestroy(&rand);CHKERRQ(ierr); 79 *_mat = mat; 80 *_RHS = RHS; 81 *_SOLU = SOLU; 82 *_x = x; 83 *_y = y; 84 *_b = b; 85 PetscFunctionReturn(0); 86 } 87 88 int main(int argc,char **argv) 89 { 90 Mat mat,F,RHS,SOLU; 91 MatInfo info; 92 PetscErrorCode ierr; 93 PetscInt m = 15, n = 10,i,j,nrhs=2; 94 Vec x,y,b,ytmp; 95 IS perm; 96 PetscReal norm,tol=PETSC_SMALL; 97 PetscMPIInt size; 98 char solver[64]; 99 PetscBool inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE; 100 101 ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 102 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 103 if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); 104 ierr = PetscStrcpy(solver,"petsc");CHKERRQ(ierr); 105 ierr = PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);CHKERRQ(ierr); 106 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 107 ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); 108 ierr = PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL);CHKERRQ(ierr); 109 ierr = PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL);CHKERRQ(ierr); 110 ierr = PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL);CHKERRQ(ierr); 111 ierr = PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL);CHKERRQ(ierr); 112 113 ierr = createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);CHKERRQ(ierr); 114 ierr = VecDuplicate(y,&ytmp);CHKERRQ(ierr); 115 116 /* Only SeqDense* support in-place factorizations and NULL permutations */ 117 ierr = PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace);CHKERRQ(ierr); 118 ierr = MatGetLocalSize(mat,&i,NULL);CHKERRQ(ierr); 119 ierr = MatGetOwnershipRange(mat,&j,NULL);CHKERRQ(ierr); 120 ierr = ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm);CHKERRQ(ierr); 121 122 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 123 ierr = PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %D, allocated nonzeros = %D\n", 124 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);CHKERRQ(ierr); 125 ierr = MatMult(mat,x,b);CHKERRQ(ierr); 126 127 /* Cholesky factorization - perm and factinfo are ignored by LAPACK */ 128 /* in-place Cholesky */ 129 if (inplace) { 130 Mat RHS2; 131 132 ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); 133 if (!ldl) { ierr = MatSetOption(F,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } 134 ierr = MatCholeskyFactor(F,perm,0);CHKERRQ(ierr); 135 ierr = MatSolve(F,b,y);CHKERRQ(ierr); 136 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 137 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 138 if (norm > tol) { 139 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm);CHKERRQ(ierr); 140 } 141 142 ierr = MatMatSolve(F,RHS,SOLU);CHKERRQ(ierr); 143 ierr = MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);CHKERRQ(ierr); 144 ierr = MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 145 ierr = MatNorm(RHS,NORM_FROBENIUS,&norm);CHKERRQ(ierr); 146 if (norm > tol) { 147 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm);CHKERRQ(ierr); 148 } 149 ierr = MatDestroy(&F);CHKERRQ(ierr); 150 ierr = MatDestroy(&RHS2);CHKERRQ(ierr); 151 } 152 153 /* out-of-place Cholesky */ 154 ierr = MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); 155 if (!ldl) { ierr = MatSetOption(F,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); } 156 ierr = MatCholeskyFactorSymbolic(F,mat,perm,0);CHKERRQ(ierr); 157 ierr = MatCholeskyFactorNumeric(F,mat,0);CHKERRQ(ierr); 158 ierr = MatSolve(F,b,y);CHKERRQ(ierr); 159 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 160 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 161 if (norm > tol) { 162 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm);CHKERRQ(ierr); 163 } 164 ierr = MatDestroy(&F);CHKERRQ(ierr); 165 166 /* LU factorization - perms and factinfo are ignored by LAPACK */ 167 i = n-1; 168 ierr = MatZeroRows(mat,1,&i,-1.0,NULL,NULL);CHKERRQ(ierr); 169 ierr = MatMult(mat,x,b);CHKERRQ(ierr); 170 171 /* in-place LU */ 172 if (inplace) { 173 Mat RHS2; 174 175 ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); 176 ierr = MatLUFactor(F,perm,perm,0);CHKERRQ(ierr); 177 ierr = MatSolve(F,b,y);CHKERRQ(ierr); 178 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 179 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 180 if (norm > tol) { 181 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm);CHKERRQ(ierr); 182 } 183 ierr = MatMatSolve(F,RHS,SOLU);CHKERRQ(ierr); 184 ierr = MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);CHKERRQ(ierr); 185 ierr = MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 186 ierr = MatNorm(RHS,NORM_FROBENIUS,&norm);CHKERRQ(ierr); 187 if (norm > tol) { 188 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm);CHKERRQ(ierr); 189 } 190 ierr = MatDestroy(&F);CHKERRQ(ierr); 191 ierr = MatDestroy(&RHS2);CHKERRQ(ierr); 192 } 193 194 /* out-of-place LU */ 195 ierr = MatGetFactor(mat,solver,MAT_FACTOR_LU,&F);CHKERRQ(ierr); 196 ierr = MatLUFactorSymbolic(F,mat,perm,perm,0);CHKERRQ(ierr); 197 ierr = MatLUFactorNumeric(F,mat,0);CHKERRQ(ierr); 198 ierr = MatSolve(F,b,y);CHKERRQ(ierr); 199 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 200 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 201 if (norm > tol) { 202 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm);CHKERRQ(ierr); 203 } 204 205 /* free space */ 206 ierr = ISDestroy(&perm);CHKERRQ(ierr); 207 ierr = MatDestroy(&F);CHKERRQ(ierr); 208 ierr = MatDestroy(&mat);CHKERRQ(ierr); 209 ierr = MatDestroy(&RHS);CHKERRQ(ierr); 210 ierr = MatDestroy(&SOLU);CHKERRQ(ierr); 211 ierr = VecDestroy(&x);CHKERRQ(ierr); 212 ierr = VecDestroy(&b);CHKERRQ(ierr); 213 ierr = VecDestroy(&y);CHKERRQ(ierr); 214 ierr = VecDestroy(&ytmp);CHKERRQ(ierr); 215 216 if (qr) { 217 /* setup rectanglar */ 218 ierr = createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);CHKERRQ(ierr); 219 ierr = VecDuplicate(y,&ytmp);CHKERRQ(ierr); 220 221 /* QR factorization - perms and factinfo are ignored by LAPACK */ 222 ierr = MatMult(mat,x,b);CHKERRQ(ierr); 223 224 /* in-place QR */ 225 if (inplace) { 226 Mat SOLU2; 227 228 ierr = MatDuplicate(mat,MAT_COPY_VALUES,&F);CHKERRQ(ierr); 229 ierr = MatQRFactor(F,NULL,0);CHKERRQ(ierr); 230 ierr = MatSolve(F,b,y);CHKERRQ(ierr); 231 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 232 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 233 if (norm > tol) { 234 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm);CHKERRQ(ierr); 235 } 236 ierr = MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS);CHKERRQ(ierr); 237 ierr = MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2);CHKERRQ(ierr); 238 ierr = MatMatSolve(F,RHS,SOLU2);CHKERRQ(ierr); 239 ierr = MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 240 ierr = MatNorm(SOLU2,NORM_FROBENIUS,&norm);CHKERRQ(ierr); 241 if (norm > tol) { 242 ierr = PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm);CHKERRQ(ierr); 243 } 244 ierr = MatDestroy(&F);CHKERRQ(ierr); 245 ierr = MatDestroy(&SOLU2);CHKERRQ(ierr); 246 } 247 248 /* out-of-place QR */ 249 ierr = MatGetFactor(mat,solver,MAT_FACTOR_QR,&F);CHKERRQ(ierr); 250 ierr = MatQRFactorSymbolic(F,mat,NULL,NULL);CHKERRQ(ierr); 251 ierr = MatQRFactorNumeric(F,mat,NULL);CHKERRQ(ierr); 252 ierr = MatSolve(F,b,y);CHKERRQ(ierr); 253 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 254 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 255 if (norm > tol) { 256 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm);CHKERRQ(ierr); 257 } 258 259 if (m == n) { 260 /* out-of-place MatSolveTranspose */ 261 ierr = MatMultTranspose(mat,x,b);CHKERRQ(ierr); 262 ierr = MatSolveTranspose(F,b,y);CHKERRQ(ierr); 263 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 264 ierr = VecNorm(y,NORM_2,&norm);CHKERRQ(ierr); 265 if (norm > tol) { 266 ierr = PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm);CHKERRQ(ierr); 267 } 268 } 269 270 /* free space */ 271 ierr = MatDestroy(&F);CHKERRQ(ierr); 272 ierr = MatDestroy(&mat);CHKERRQ(ierr); 273 ierr = MatDestroy(&RHS);CHKERRQ(ierr); 274 ierr = MatDestroy(&SOLU);CHKERRQ(ierr); 275 ierr = VecDestroy(&x);CHKERRQ(ierr); 276 ierr = VecDestroy(&b);CHKERRQ(ierr); 277 ierr = VecDestroy(&y);CHKERRQ(ierr); 278 ierr = VecDestroy(&ytmp);CHKERRQ(ierr); 279 } 280 ierr = PetscFinalize(); 281 return ierr; 282 } 283 284 /*TEST 285 286 test: 287 288 test: 289 requires: cuda 290 suffix: seqdensecuda 291 args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} 292 output_file: output/ex1_1.out 293 294 test: 295 requires: cuda 296 suffix: seqdensecuda_2 297 args: -ldl 0 -solver_type cuda 298 output_file: output/ex1_1.out 299 300 test: 301 requires: cuda 302 suffix: seqdensecuda_seqaijcusparse 303 args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0 304 output_file: output/ex1_2.out 305 306 test: 307 requires: cuda viennacl 308 suffix: seqdensecuda_seqaijviennacl 309 args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0 310 output_file: output/ex1_2.out 311 312 test: 313 suffix: 4 314 args: -m 10 -n 10 315 output_file: output/ex1_1.out 316 317 TEST*/ 318