1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "src/mat/impls/aij/seq/aij.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_USE_COMPLEX) 15 #include "superlu_zdefs.h" 16 #else 17 #include "superlu_ddefs.h" 18 #endif 19 EXTERN_C_END 20 21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 22 23 typedef struct { 24 int_t nprow,npcol,*row,*col; 25 gridinfo_t grid; 26 superlu_options_t options; 27 SuperMatrix A_sup; 28 ScalePermstruct_t ScalePermstruct; 29 LUstruct_t LUstruct; 30 int StatPrint; 31 int MatInputMode; 32 SOLVEstruct_t SOLVEstruct; 33 fact_t FactPattern; 34 MPI_Comm comm_superlu; 35 #if defined(PETSC_USE_COMPLEX) 36 doublecomplex *val; 37 #else 38 double *val; 39 #endif 40 41 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 42 PetscTruth CleanUpSuperLU_Dist; 43 } Mat_SuperLU_DIST; 44 45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *); 47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *); 51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 56 { 57 PetscErrorCode ierr; 58 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 59 60 PetscFunctionBegin; 61 if (lu->CleanUpSuperLU_Dist) { 62 /* Deallocate SuperLU_DIST storage */ 63 if (lu->MatInputMode == GLOBAL) { 64 Destroy_CompCol_Matrix_dist(&lu->A_sup); 65 } else { 66 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 67 if ( lu->options.SolveInitialized ) { 68 #if defined(PETSC_USE_COMPLEX) 69 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 70 #else 71 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 72 #endif 73 } 74 } 75 Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct); 76 ScalePermstructFree(&lu->ScalePermstruct); 77 LUstructFree(&lu->LUstruct); 78 79 /* Release the SuperLU_DIST process grid. */ 80 superlu_gridexit(&lu->grid); 81 82 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 83 } 84 85 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatSolve_SuperLU_DIST" 91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 92 { 93 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 94 PetscErrorCode ierr; 95 PetscMPIInt size; 96 PetscInt m=A->rmap->N, N=A->cmap->N; 97 SuperLUStat_t stat; 98 double berr[1]; 99 PetscScalar *bptr; 100 PetscInt info, nrhs=1; 101 Vec x_seq; 102 IS iden; 103 VecScatter scat; 104 105 PetscFunctionBegin; 106 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 107 if (size > 1) { 108 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 110 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 111 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 112 ierr = ISDestroy(iden);CHKERRQ(ierr); 113 114 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 115 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 116 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 117 } else { /* distributed mat input */ 118 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 119 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 120 } 121 } else { /* size == 1 */ 122 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 123 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 124 } 125 126 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 127 128 PStatInit(&stat); /* Initialize the statistics variables. */ 129 if (lu->MatInputMode == GLOBAL) { 130 #if defined(PETSC_USE_COMPLEX) 131 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 132 &lu->grid, &lu->LUstruct, berr, &stat, &info); 133 #else 134 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 135 &lu->grid, &lu->LUstruct, berr, &stat, &info); 136 #endif 137 } else { /* distributed mat input */ 138 #if defined(PETSC_USE_COMPLEX) 139 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid, 140 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 141 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 142 #else 143 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid, 144 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 145 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 146 #endif 147 } 148 if (lu->options.PrintStat) { 149 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 150 } 151 PStatFree(&stat); 152 153 if (size > 1) { 154 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 155 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 156 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 157 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 158 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 159 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 160 } else { 161 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 162 } 163 } else { 164 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 165 } 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F) 172 { 173 Mat *tseq,A_seq = PETSC_NULL; 174 Mat_SeqAIJ *aa,*bb; 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 176 PetscErrorCode ierr; 177 PetscInt M=A->rmap->N,N=A->cmap->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 178 m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 179 PetscMPIInt size,rank; 180 SuperLUStat_t stat; 181 double *berr=0; 182 IS isrow; 183 PetscLogDouble time0,time,time_min,time_max; 184 Mat F_diag=PETSC_NULL; 185 #if defined(PETSC_USE_COMPLEX) 186 doublecomplex *av, *bv; 187 #else 188 double *av, *bv; 189 #endif 190 191 PetscFunctionBegin; 192 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 193 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 194 195 if (lu->options.PrintStat) { /* collect time for mat conversion */ 196 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 197 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 198 } 199 200 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 201 if (size > 1) { /* convert mpi A to seq mat A */ 202 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 203 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 204 ierr = ISDestroy(isrow);CHKERRQ(ierr); 205 206 A_seq = *tseq; 207 ierr = PetscFree(tseq);CHKERRQ(ierr); 208 aa = (Mat_SeqAIJ*)A_seq->data; 209 } else { 210 aa = (Mat_SeqAIJ*)A->data; 211 } 212 213 /* Convert Petsc NR matrix to SuperLU_DIST NC. 214 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 215 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 216 if (lu->FactPattern == SamePattern_SameRowPerm){ 217 Destroy_CompCol_Matrix_dist(&lu->A_sup); 218 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 219 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 220 } else { 221 Destroy_CompCol_Matrix_dist(&lu->A_sup); 222 Destroy_LU(N, &lu->grid, &lu->LUstruct); 223 lu->options.Fact = SamePattern; 224 } 225 } 226 #if defined(PETSC_USE_COMPLEX) 227 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 228 #else 229 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 230 #endif 231 232 /* Create compressed column matrix A_sup. */ 233 #if defined(PETSC_USE_COMPLEX) 234 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 235 #else 236 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 237 #endif 238 } else { /* distributed mat input */ 239 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 240 aa=(Mat_SeqAIJ*)(mat->A)->data; 241 bb=(Mat_SeqAIJ*)(mat->B)->data; 242 ai=aa->i; aj=aa->j; 243 bi=bb->i; bj=bb->j; 244 #if defined(PETSC_USE_COMPLEX) 245 av=(doublecomplex*)aa->a; 246 bv=(doublecomplex*)bb->a; 247 #else 248 av=aa->a; 249 bv=bb->a; 250 #endif 251 rstart = A->rmap->rstart; 252 nz = aa->nz + bb->nz; 253 garray = mat->garray; 254 255 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 256 #if defined(PETSC_USE_COMPLEX) 257 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 258 #else 259 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 260 #endif 261 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 262 if (lu->FactPattern == SamePattern_SameRowPerm){ 263 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 264 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 265 } else { 266 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 267 lu->options.Fact = SamePattern; 268 } 269 } 270 nz = 0; irow = rstart; 271 for ( i=0; i<m; i++ ) { 272 lu->row[i] = nz; 273 countA = ai[i+1] - ai[i]; 274 countB = bi[i+1] - bi[i]; 275 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 276 bjj = bj + bi[i]; 277 278 /* B part, smaller col index */ 279 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 280 jB = 0; 281 for (j=0; j<countB; j++){ 282 jcol = garray[bjj[j]]; 283 if (jcol > colA_start) { 284 jB = j; 285 break; 286 } 287 lu->col[nz] = jcol; 288 lu->val[nz++] = *bv++; 289 if (j==countB-1) jB = countB; 290 } 291 292 /* A part */ 293 for (j=0; j<countA; j++){ 294 lu->col[nz] = rstart + ajj[j]; 295 lu->val[nz++] = *av++; 296 } 297 298 /* B part, larger col index */ 299 for (j=jB; j<countB; j++){ 300 lu->col[nz] = garray[bjj[j]]; 301 lu->val[nz++] = *bv++; 302 } 303 } 304 lu->row[m] = nz; 305 #if defined(PETSC_USE_COMPLEX) 306 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 307 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 308 #else 309 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 310 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 311 #endif 312 } 313 if (lu->options.PrintStat) { 314 ierr = PetscGetTime(&time);CHKERRQ(ierr); 315 time0 = time - time0; 316 } 317 318 /* Factor the matrix. */ 319 PStatInit(&stat); /* Initialize the statistics variables. */ 320 321 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 322 #if defined(PETSC_USE_COMPLEX) 323 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 324 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 325 #else 326 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 327 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 328 #endif 329 } else { /* distributed mat input */ 330 #if defined(PETSC_USE_COMPLEX) 331 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 332 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 333 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 334 #else 335 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 336 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 337 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 338 #endif 339 } 340 341 if (lu->MatInputMode == GLOBAL && size > 1){ 342 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 343 } 344 345 if (lu->options.PrintStat) { 346 if (size > 1){ 347 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 348 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 349 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 350 time = time/size; /* average time */ 351 if (!rank) { 352 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 353 } 354 } else { 355 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 356 } 357 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 358 } 359 PStatFree(&stat); 360 if (size > 1){ 361 F_diag = ((Mat_MPIAIJ *)(*F)->data)->A; 362 F_diag->assembled = PETSC_TRUE; 363 } 364 (*F)->assembled = PETSC_TRUE; 365 (*F)->preallocated = PETSC_TRUE; 366 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 367 PetscFunctionReturn(0); 368 } 369 370 /* Note the Petsc r and c permutations are ignored */ 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 374 { 375 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (*F)->spptr; 376 PetscInt M=A->rmap->N,N=A->cmap->N; 377 378 PetscFunctionBegin; 379 /* Initialize the SuperLU process grid. */ 380 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 381 382 /* Initialize ScalePermstruct and LUstruct. */ 383 ScalePermstructInit(M, N, &lu->ScalePermstruct); 384 LUstructInit(M, N, &lu->LUstruct); 385 (*F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 386 (*F)->ops->solve = MatSolve_SuperLU_DIST; 387 PetscFunctionReturn(0); 388 } 389 390 EXTERN_C_BEGIN 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist" 393 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 394 { 395 Mat B; 396 Mat_SuperLU_DIST *lu; 397 PetscErrorCode ierr; 398 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 399 PetscMPIInt size; 400 superlu_options_t options; 401 PetscTruth flg; 402 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 403 const char *prtype[] = {"LargeDiag","NATURAL"}; 404 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 405 406 PetscFunctionBegin; 407 /* Create the factorization matrix */ 408 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 409 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 410 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 411 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 412 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 413 414 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 415 B->factor = MAT_FACTOR_LU; 416 417 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 418 B->spptr = lu; 419 420 /* Set the default input options: 421 options.Fact = DOFACT; 422 options.Equil = YES; 423 options.ParSymbFact = NO; 424 options.ColPerm = MMD_AT_PLUS_A; 425 options.RowPerm = LargeDiag; 426 options.ReplaceTinyPivot = YES; 427 options.IterRefine = DOUBLE; 428 options.Trans = NOTRANS; 429 options.SolveInitialized = NO; 430 options.RefineInitialized = NO; 431 options.PrintStat = YES; 432 */ 433 set_default_options_dist(&options); 434 435 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 436 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 437 /* Default num of process columns and rows */ 438 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 439 if (!lu->npcol) lu->npcol = 1; 440 while (lu->npcol > 0) { 441 lu->nprow = PetscMPIIntCast(size/lu->npcol); 442 if (size == lu->nprow * lu->npcol) break; 443 lu->npcol --; 444 } 445 446 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 447 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 448 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 449 if (size != lu->nprow * lu->npcol) 450 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 451 452 lu->MatInputMode = DISTRIBUTED; 453 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 454 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 455 456 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 457 if (!flg) { 458 options.Equil = NO; 459 } 460 461 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 462 if (flg) { 463 switch (indx) { 464 case 0: 465 options.RowPerm = LargeDiag; 466 break; 467 case 1: 468 options.RowPerm = NOROWPERM; 469 break; 470 } 471 } 472 473 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 474 if (flg) { 475 switch (indx) { 476 case 0: 477 options.ColPerm = MMD_AT_PLUS_A; 478 break; 479 case 1: 480 options.ColPerm = NATURAL; 481 break; 482 case 2: 483 options.ColPerm = MMD_ATA; 484 break; 485 case 3: 486 options.ColPerm = PARMETIS; 487 break; 488 } 489 } 490 491 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 492 if (!flg) { 493 options.ReplaceTinyPivot = NO; 494 } 495 496 options.ParSymbFact = NO; 497 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 498 if (flg){ 499 #ifdef PETSC_HAVE_PARMETIS 500 options.ParSymbFact = YES; 501 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 502 #else 503 printf("parsymbfact needs PARMETIS"); 504 #endif 505 } 506 507 lu->FactPattern = SamePattern; 508 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 509 if (flg) { 510 switch (indx) { 511 case 0: 512 lu->FactPattern = SamePattern; 513 break; 514 case 1: 515 lu->FactPattern = SamePattern_SameRowPerm; 516 break; 517 } 518 } 519 520 options.IterRefine = NOREFINE; 521 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 522 if (flg) { 523 options.IterRefine = DOUBLE; 524 } 525 526 if (PetscLogPrintInfo) { 527 options.PrintStat = YES; 528 } else { 529 options.PrintStat = NO; 530 } 531 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 532 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 533 PetscOptionsEnd(); 534 535 lu->options = options; 536 lu->options.Fact = DOFACT; 537 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 538 *F = B; 539 PetscFunctionReturn(0); 540 } 541 EXTERN_C_END 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 545 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 546 { 547 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 548 superlu_options_t options; 549 PetscErrorCode ierr; 550 551 PetscFunctionBegin; 552 /* check if matrix is superlu_dist type */ 553 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 554 555 options = lu->options; 556 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 557 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 558 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 559 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 560 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 561 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 562 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 563 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 564 if (options.ColPerm == NATURAL) { 565 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 566 } else if (options.ColPerm == MMD_AT_PLUS_A) { 567 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 568 } else if (options.ColPerm == MMD_ATA) { 569 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 570 } else if (options.ColPerm == PARMETIS) { 571 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 572 } else { 573 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 574 } 575 576 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 577 578 if (lu->FactPattern == SamePattern){ 579 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 580 } else { 581 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 582 } 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatView_SuperLU_DIST" 588 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 589 { 590 PetscErrorCode ierr; 591 PetscTruth iascii; 592 PetscViewerFormat format; 593 594 PetscFunctionBegin; 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 596 if (iascii) { 597 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 598 if (format == PETSC_VIEWER_ASCII_INFO) { 599 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 600 } 601 } 602 PetscFunctionReturn(0); 603 } 604 605 606 /*MC 607 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 608 via the external package SuperLU_DIST. 609 610 If SuperLU_DIST is installed (see the manual for 611 instructions on how to declare the existence of external packages), 612 a matrix type can be constructed which invokes SuperLU_DIST solvers. 613 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 614 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 615 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 616 617 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 618 and from MATMPIAIJ otherwise. As a result, for single process communicators, 619 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 620 for communicators controlling multiple processes. It is recommended that you call both of 621 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 622 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 623 without data copy; but this MUST be called AFTER the matrix values are set. 624 625 626 627 Options Database Keys: 628 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 629 . -mat_superlu_dist_r <n> - number of rows in processor partition 630 . -mat_superlu_dist_c <n> - number of columns in processor partition 631 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 632 . -mat_superlu_dist_equil - equilibrate the matrix 633 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 634 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 635 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 636 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 637 . -mat_superlu_dist_iterrefine - use iterative refinement 638 - -mat_superlu_dist_statprint - print factorization information 639 640 Level: beginner 641 642 .seealso: PCLU 643 M*/ 644 645