1 #define PETSCMAT_DLL 2 3 /* 4 Provides an interface to the SuperLU_DIST_2.2 sparse solver 5 */ 6 7 #include "../src/mat/impls/aij/seq/aij.h" 8 #include "../src/mat/impls/aij/mpi/mpiaij.h" 9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 10 #include "stdlib.h" 11 #endif 12 13 #if defined(PETSC_USE_64BIT_INDICES) 14 /* ugly SuperLU_Dist variable telling it to use long long int */ 15 #define _LONGINT 16 #endif 17 18 EXTERN_C_BEGIN 19 #if defined(PETSC_USE_COMPLEX) 20 #include "superlu_zdefs.h" 21 #else 22 #include "superlu_ddefs.h" 23 #endif 24 EXTERN_C_END 25 26 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode; 27 const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0}; 28 29 typedef struct { 30 int_t nprow,npcol,*row,*col; 31 gridinfo_t grid; 32 superlu_options_t options; 33 SuperMatrix A_sup; 34 ScalePermstruct_t ScalePermstruct; 35 LUstruct_t LUstruct; 36 int StatPrint; 37 SuperLU_MatInputMode MatInputMode; 38 SOLVEstruct_t SOLVEstruct; 39 fact_t FactPattern; 40 MPI_Comm comm_superlu; 41 #if defined(PETSC_USE_COMPLEX) 42 doublecomplex *val; 43 #else 44 double *val; 45 #endif 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer); 52 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *); 53 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat); 54 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer); 55 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec); 56 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *); 57 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 61 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 62 { 63 PetscErrorCode ierr; 64 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 65 66 PetscFunctionBegin; 67 if (lu->CleanUpSuperLU_Dist) { 68 /* Deallocate SuperLU_DIST storage */ 69 if (lu->MatInputMode == GLOBAL) { 70 Destroy_CompCol_Matrix_dist(&lu->A_sup); 71 } else { 72 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 73 if ( lu->options.SolveInitialized ) { 74 #if defined(PETSC_USE_COMPLEX) 75 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 76 #else 77 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 78 #endif 79 } 80 } 81 Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct); 82 ScalePermstructFree(&lu->ScalePermstruct); 83 LUstructFree(&lu->LUstruct); 84 85 /* Release the SuperLU_DIST process grid. */ 86 superlu_gridexit(&lu->grid); 87 88 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 89 } 90 91 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatSolve_SuperLU_DIST" 97 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 98 { 99 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 100 PetscErrorCode ierr; 101 PetscMPIInt size; 102 PetscInt m=A->rmap->N, N=A->cmap->N; 103 SuperLUStat_t stat; 104 double berr[1]; 105 PetscScalar *bptr; 106 PetscInt nrhs=1; 107 Vec x_seq; 108 IS iden; 109 VecScatter scat; 110 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 111 112 PetscFunctionBegin; 113 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 114 if (size > 1) { 115 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 116 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 117 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 118 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 119 ierr = ISDestroy(iden);CHKERRQ(ierr); 120 121 ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 122 ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 123 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 124 } else { /* distributed mat input */ 125 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 126 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 127 } 128 } else { /* size == 1 */ 129 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 130 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 131 } 132 133 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 134 135 PStatInit(&stat); /* Initialize the statistics variables. */ 136 if (lu->MatInputMode == GLOBAL) { 137 #if defined(PETSC_USE_COMPLEX) 138 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 139 &lu->grid, &lu->LUstruct, berr, &stat, &info); 140 #else 141 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 142 &lu->grid, &lu->LUstruct, berr, &stat, &info); 143 #endif 144 } else { /* distributed mat input */ 145 #if defined(PETSC_USE_COMPLEX) 146 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid, 147 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 148 if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info); 149 #else 150 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid, 151 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 152 if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 153 #endif 154 } 155 if (lu->options.PrintStat) { 156 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 157 } 158 PStatFree(&stat); 159 160 if (size > 1) { 161 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 162 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 163 ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 164 ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 165 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 166 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 167 } else { 168 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 169 } 170 } else { 171 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 172 } 173 PetscFunctionReturn(0); 174 } 175 176 #undef __FUNCT__ 177 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 178 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 179 { 180 Mat *tseq,A_seq = PETSC_NULL; 181 Mat_SeqAIJ *aa,*bb; 182 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr; 183 PetscErrorCode ierr; 184 PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 185 m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 186 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 187 PetscMPIInt size,rank; 188 SuperLUStat_t stat; 189 double *berr=0; 190 IS isrow; 191 PetscLogDouble time0,time,time_min,time_max; 192 Mat F_diag=PETSC_NULL; 193 #if defined(PETSC_USE_COMPLEX) 194 doublecomplex *av, *bv; 195 #else 196 double *av, *bv; 197 #endif 198 199 PetscFunctionBegin; 200 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 201 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 202 203 if (lu->options.PrintStat) { /* collect time for mat conversion */ 204 ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr); 205 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 206 } 207 208 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 209 if (size > 1) { /* convert mpi A to seq mat A */ 210 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 211 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 212 ierr = ISDestroy(isrow);CHKERRQ(ierr); 213 214 A_seq = *tseq; 215 ierr = PetscFree(tseq);CHKERRQ(ierr); 216 aa = (Mat_SeqAIJ*)A_seq->data; 217 } else { 218 aa = (Mat_SeqAIJ*)A->data; 219 } 220 221 /* Convert Petsc NR matrix to SuperLU_DIST NC. 222 Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */ 223 if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */ 224 if (lu->FactPattern == SamePattern_SameRowPerm){ 225 Destroy_CompCol_Matrix_dist(&lu->A_sup); 226 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 227 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 228 } else { 229 Destroy_CompCol_Matrix_dist(&lu->A_sup); 230 Destroy_LU(N, &lu->grid, &lu->LUstruct); 231 lu->options.Fact = SamePattern; 232 } 233 } 234 #if defined(PETSC_USE_COMPLEX) 235 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 236 #else 237 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 238 #endif 239 240 /* Create compressed column matrix A_sup. */ 241 #if defined(PETSC_USE_COMPLEX) 242 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 243 #else 244 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 245 #endif 246 } else { /* distributed mat input */ 247 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 248 aa=(Mat_SeqAIJ*)(mat->A)->data; 249 bb=(Mat_SeqAIJ*)(mat->B)->data; 250 ai=aa->i; aj=aa->j; 251 bi=bb->i; bj=bb->j; 252 #if defined(PETSC_USE_COMPLEX) 253 av=(doublecomplex*)aa->a; 254 bv=(doublecomplex*)bb->a; 255 #else 256 av=aa->a; 257 bv=bb->a; 258 #endif 259 rstart = A->rmap->rstart; 260 nz = aa->nz + bb->nz; 261 garray = mat->garray; 262 263 if (lu->options.Fact == DOFACT) {/* first numeric factorization */ 264 #if defined(PETSC_USE_COMPLEX) 265 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 266 #else 267 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 268 #endif 269 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 270 if (lu->FactPattern == SamePattern_SameRowPerm){ 271 /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */ 272 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 273 } else { 274 Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */ 275 lu->options.Fact = SamePattern; 276 } 277 } 278 nz = 0; irow = rstart; 279 for ( i=0; i<m; i++ ) { 280 lu->row[i] = nz; 281 countA = ai[i+1] - ai[i]; 282 countB = bi[i+1] - bi[i]; 283 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 284 bjj = bj + bi[i]; 285 286 /* B part, smaller col index */ 287 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 288 jB = 0; 289 for (j=0; j<countB; j++){ 290 jcol = garray[bjj[j]]; 291 if (jcol > colA_start) { 292 jB = j; 293 break; 294 } 295 lu->col[nz] = jcol; 296 lu->val[nz++] = *bv++; 297 if (j==countB-1) jB = countB; 298 } 299 300 /* A part */ 301 for (j=0; j<countA; j++){ 302 lu->col[nz] = rstart + ajj[j]; 303 lu->val[nz++] = *av++; 304 } 305 306 /* B part, larger col index */ 307 for (j=jB; j<countB; j++){ 308 lu->col[nz] = garray[bjj[j]]; 309 lu->val[nz++] = *bv++; 310 } 311 } 312 lu->row[m] = nz; 313 #if defined(PETSC_USE_COMPLEX) 314 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 315 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 316 #else 317 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 318 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 319 #endif 320 } 321 if (lu->options.PrintStat) { 322 ierr = PetscGetTime(&time);CHKERRQ(ierr); 323 time0 = time - time0; 324 } 325 326 /* Factor the matrix. */ 327 PStatInit(&stat); /* Initialize the statistics variables. */ 328 CHKMEMQ; 329 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 330 #if defined(PETSC_USE_COMPLEX) 331 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 332 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 333 #else 334 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 335 &lu->grid, &lu->LUstruct, berr, &stat, &sinfo); 336 #endif 337 } else { /* distributed mat input */ 338 #if defined(PETSC_USE_COMPLEX) 339 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 340 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 341 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo); 342 #else 343 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 344 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo); 345 if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo); 346 #endif 347 } 348 349 if (lu->MatInputMode == GLOBAL && size > 1){ 350 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 351 } 352 353 if (lu->options.PrintStat) { 354 if (size > 1){ 355 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm); 356 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm); 357 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm); 358 time = time/size; /* average time */ 359 if (!rank) { 360 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr); 361 } 362 } else { 363 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);CHKERRQ(ierr); 364 } 365 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 366 } 367 PStatFree(&stat); 368 if (size > 1){ 369 F_diag = ((Mat_MPIAIJ *)(F)->data)->A; 370 F_diag->assembled = PETSC_TRUE; 371 } 372 (F)->assembled = PETSC_TRUE; 373 (F)->preallocated = PETSC_TRUE; 374 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 375 PetscFunctionReturn(0); 376 } 377 378 /* Note the Petsc r and c permutations are ignored */ 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 381 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 382 { 383 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (F)->spptr; 384 PetscInt M=A->rmap->N,N=A->cmap->N; 385 386 PetscFunctionBegin; 387 /* Initialize the SuperLU process grid. */ 388 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 389 390 /* Initialize ScalePermstruct and LUstruct. */ 391 ScalePermstructInit(M, N, &lu->ScalePermstruct); 392 LUstructInit(M, N, &lu->LUstruct); 393 (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 394 (F)->ops->solve = MatSolve_SuperLU_DIST; 395 PetscFunctionReturn(0); 396 } 397 398 EXTERN_C_BEGIN 399 #undef __FUNCT__ 400 #define __FUNCT__ "MatFactorGetSolverPackage_aij_superlu_dist" 401 PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type) 402 { 403 PetscFunctionBegin; 404 *type = MAT_SOLVER_SUPERLU_DIST; 405 PetscFunctionReturn(0); 406 } 407 EXTERN_C_END 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatGetFactor_aij_superlu_dist" 411 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 412 { 413 Mat B; 414 Mat_SuperLU_DIST *lu; 415 PetscErrorCode ierr; 416 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 417 PetscMPIInt size; 418 superlu_options_t options; 419 PetscTruth flg; 420 const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"}; 421 const char *prtype[] = {"LargeDiag","NATURAL"}; 422 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"}; 423 424 PetscFunctionBegin; 425 /* Create the factorization matrix */ 426 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 427 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 428 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 429 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 430 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 431 432 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 433 B->ops->view = MatView_SuperLU_DIST; 434 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);CHKERRQ(ierr); 435 B->factor = MAT_FACTOR_LU; 436 437 ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 438 B->spptr = lu; 439 440 /* Set the default input options: 441 options.Fact = DOFACT; 442 options.Equil = YES; 443 options.ParSymbFact = NO; 444 options.ColPerm = MMD_AT_PLUS_A; 445 options.RowPerm = LargeDiag; 446 options.ReplaceTinyPivot = YES; 447 options.IterRefine = DOUBLE; 448 options.Trans = NOTRANS; 449 options.SolveInitialized = NO; 450 options.RefineInitialized = NO; 451 options.PrintStat = YES; 452 */ 453 set_default_options_dist(&options); 454 455 ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr); 456 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 457 /* Default num of process columns and rows */ 458 lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size))); 459 if (!lu->npcol) lu->npcol = 1; 460 while (lu->npcol > 0) { 461 lu->nprow = PetscMPIIntCast(size/lu->npcol); 462 if (size == lu->nprow * lu->npcol) break; 463 lu->npcol --; 464 } 465 466 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 467 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 468 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 469 if (size != lu->nprow * lu->npcol) 470 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 471 472 lu->MatInputMode = DISTRIBUTED; 473 ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 474 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 475 476 ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 477 if (!flg) { 478 options.Equil = NO; 479 } 480 481 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 482 if (flg) { 483 switch (indx) { 484 case 0: 485 options.RowPerm = LargeDiag; 486 break; 487 case 1: 488 options.RowPerm = NOROWPERM; 489 break; 490 } 491 } 492 493 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr); 494 if (flg) { 495 switch (indx) { 496 case 0: 497 options.ColPerm = MMD_AT_PLUS_A; 498 break; 499 case 1: 500 options.ColPerm = NATURAL; 501 break; 502 case 2: 503 options.ColPerm = MMD_ATA; 504 break; 505 case 3: 506 options.ColPerm = PARMETIS; 507 break; 508 } 509 } 510 511 ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 512 if (!flg) { 513 options.ReplaceTinyPivot = NO; 514 } 515 516 options.ParSymbFact = NO; 517 ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 518 if (flg){ 519 #ifdef PETSC_HAVE_PARMETIS 520 options.ParSymbFact = YES; 521 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 522 #else 523 printf("parsymbfact needs PARMETIS"); 524 #endif 525 } 526 527 lu->FactPattern = SamePattern; 528 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr); 529 if (flg) { 530 switch (indx) { 531 case 0: 532 lu->FactPattern = SamePattern; 533 break; 534 case 1: 535 lu->FactPattern = SamePattern_SameRowPerm; 536 break; 537 } 538 } 539 540 options.IterRefine = NOREFINE; 541 ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 542 if (flg) { 543 options.IterRefine = DOUBLE; 544 } 545 546 if (PetscLogPrintInfo) { 547 options.PrintStat = YES; 548 } else { 549 options.PrintStat = NO; 550 } 551 ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None", 552 (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr); 553 PetscOptionsEnd(); 554 555 lu->options = options; 556 lu->options.Fact = DOFACT; 557 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 558 *F = B; 559 PetscFunctionReturn(0); 560 } 561 562 EXTERN_C_BEGIN 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist" 565 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 566 { 567 PetscErrorCode ierr; 568 569 PetscFunctionBegin; 570 ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 EXTERN_C_END 574 575 EXTERN_C_BEGIN 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist" 578 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 579 { 580 PetscErrorCode ierr; 581 582 PetscFunctionBegin; 583 ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 EXTERN_C_END 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 590 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 591 { 592 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 593 superlu_options_t options; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 /* check if matrix is superlu_dist type */ 598 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 599 600 options = lu->options; 601 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 602 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 603 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr); 604 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 605 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 606 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr); 607 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 608 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 609 if (options.ColPerm == NATURAL) { 610 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 611 } else if (options.ColPerm == MMD_AT_PLUS_A) { 612 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 613 } else if (options.ColPerm == MMD_ATA) { 614 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 615 } else if (options.ColPerm == PARMETIS) { 616 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 617 } else { 618 SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 619 } 620 621 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr); 622 623 if (lu->FactPattern == SamePattern){ 624 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 625 } else { 626 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 627 } 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatView_SuperLU_DIST" 633 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 634 { 635 PetscErrorCode ierr; 636 PetscTruth iascii; 637 PetscViewerFormat format; 638 639 PetscFunctionBegin; 640 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 641 if (iascii) { 642 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 643 if (format == PETSC_VIEWER_ASCII_INFO) { 644 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 645 } 646 } 647 PetscFunctionReturn(0); 648 } 649 650 651 /*MC 652 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 653 via the external package SuperLU_DIST. 654 655 If SuperLU_DIST is installed (see the manual for 656 instructions on how to declare the existence of external packages), 657 a matrix type can be constructed which invokes SuperLU_DIST solvers. 658 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then 659 optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT 660 call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST! 661 662 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 663 and from MATMPIAIJ otherwise. As a result, for single process communicators, 664 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 665 for communicators controlling multiple processes. It is recommended that you call both of 666 the above preallocation routines for simplicity. One can also call MatConvert() for an inplace 667 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 668 without data copy; but this MUST be called AFTER the matrix values are set. 669 670 671 672 Options Database Keys: 673 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 674 . -mat_superlu_dist_r <n> - number of rows in processor partition 675 . -mat_superlu_dist_c <n> - number of columns in processor partition 676 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 677 . -mat_superlu_dist_equil - equilibrate the matrix 678 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 679 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 680 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 681 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm 682 . -mat_superlu_dist_iterrefine - use iterative refinement 683 - -mat_superlu_dist_statprint - print factorization information 684 685 Level: beginner 686 687 .seealso: PCLU 688 M*/ 689 690