1 /* 2 Provides an interface to the SuperLU_DIST_2.0 sparse solver 3 */ 4 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/mat/impls/aij/mpi/mpiaij.h" 7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */ 8 #include "stdlib.h" 9 #endif 10 11 EXTERN_C_BEGIN 12 #if defined(PETSC_USE_COMPLEX) 13 #include "superlu_zdefs.h" 14 #else 15 #include "superlu_ddefs.h" 16 #endif 17 EXTERN_C_END 18 19 typedef enum { GLOBAL,DISTRIBUTED 20 } SuperLU_MatInputMode; 21 22 typedef struct { 23 int_t nprow,npcol,*row,*col; 24 gridinfo_t grid; 25 superlu_options_t options; 26 SuperMatrix A_sup; 27 ScalePermstruct_t ScalePermstruct; 28 LUstruct_t LUstruct; 29 int StatPrint; 30 int MatInputMode; 31 SOLVEstruct_t SOLVEstruct; 32 MatStructure flg; 33 MPI_Comm comm_superlu; 34 #if defined(PETSC_USE_COMPLEX) 35 doublecomplex *val; 36 #else 37 double *val; 38 #endif 39 40 /* A few function pointers for inheritance */ 41 PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 42 PetscErrorCode (*MatView)(Mat,PetscViewer); 43 PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType); 44 PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 45 PetscErrorCode (*MatDestroy)(Mat); 46 47 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 48 PetscTruth CleanUpSuperLU_Dist; 49 } Mat_SuperLU_DIST; 50 51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*); 52 53 EXTERN_C_BEGIN 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base" 56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) { 57 PetscErrorCode ierr; 58 Mat B=*newmat; 59 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 60 61 PetscFunctionBegin; 62 if (B != A) { 63 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 64 } 65 /* Reset the original function pointers */ 66 B->ops->duplicate = lu->MatDuplicate; 67 B->ops->view = lu->MatView; 68 B->ops->assemblyend = lu->MatAssemblyEnd; 69 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 70 B->ops->destroy = lu->MatDestroy; 71 72 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 73 ierr = PetscFree(lu);CHKERRQ(ierr); 74 75 *newmat = B; 76 PetscFunctionReturn(0); 77 } 78 EXTERN_C_END 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "MatDestroy_SuperLU_DIST" 82 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 83 { 84 PetscErrorCode ierr; 85 int size; 86 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 87 88 PetscFunctionBegin; 89 if (lu->CleanUpSuperLU_Dist) { 90 /* Deallocate SuperLU_DIST storage */ 91 if (lu->MatInputMode == GLOBAL) { 92 Destroy_CompCol_Matrix_dist(&lu->A_sup); 93 } else { 94 Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); 95 if ( lu->options.SolveInitialized ) { 96 #if defined(PETSC_USE_COMPLEX) 97 zSolveFinalize(&lu->options, &lu->SOLVEstruct); 98 #else 99 dSolveFinalize(&lu->options, &lu->SOLVEstruct); 100 #endif 101 } 102 } 103 Destroy_LU(A->N, &lu->grid, &lu->LUstruct); 104 ScalePermstructFree(&lu->ScalePermstruct); 105 LUstructFree(&lu->LUstruct); 106 107 /* Release the SuperLU_DIST process grid. */ 108 superlu_gridexit(&lu->grid); 109 110 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 111 } 112 113 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 114 if (size == 1) { 115 ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr); 116 } else { 117 ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr); 118 } 119 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 120 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSolve_SuperLU_DIST" 126 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 127 { 128 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr; 129 PetscErrorCode ierr; 130 int size; 131 int m=A->M, N=A->N; 132 SuperLUStat_t stat; 133 double berr[1]; 134 PetscScalar *bptr; 135 int info, nrhs=1; 136 Vec x_seq; 137 IS iden; 138 VecScatter scat; 139 140 PetscFunctionBegin; 141 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 142 if (size > 1) { 143 if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */ 144 ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr); 145 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr); 146 ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr); 147 ierr = ISDestroy(iden);CHKERRQ(ierr); 148 149 ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 150 ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr); 151 ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr); 152 } else { /* distributed mat input */ 153 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 154 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 155 } 156 } else { /* size == 1 */ 157 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 158 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 159 } 160 161 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/ 162 163 PStatInit(&stat); /* Initialize the statistics variables. */ 164 165 if (lu->MatInputMode == GLOBAL) { 166 #if defined(PETSC_USE_COMPLEX) 167 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs, 168 &lu->grid, &lu->LUstruct, berr, &stat, &info); 169 #else 170 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs, 171 &lu->grid, &lu->LUstruct, berr, &stat, &info); 172 #endif 173 } else { /* distributed mat input */ 174 #if defined(PETSC_USE_COMPLEX) 175 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid, 176 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 177 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 178 #else 179 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid, 180 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 181 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 182 #endif 183 } 184 if (lu->StatPrint) { 185 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 186 } 187 PStatFree(&stat); 188 189 if (size > 1) { 190 if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */ 191 ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr); 192 ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 193 ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr); 194 ierr = VecScatterDestroy(scat);CHKERRQ(ierr); 195 ierr = VecDestroy(x_seq);CHKERRQ(ierr); 196 } else { 197 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 198 } 199 } else { 200 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST" 207 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F) 208 { 209 Mat *tseq,A_seq = PETSC_NULL; 210 Mat_SeqAIJ *aa,*bb; 211 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr; 212 PetscErrorCode ierr; 213 int M=A->M,N=A->N,info,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray, 214 m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj; 215 SuperLUStat_t stat; 216 double *berr=0; 217 IS isrow; 218 PetscLogDouble time0,time,time_min,time_max; 219 #if defined(PETSC_USE_COMPLEX) 220 doublecomplex *av, *bv; 221 #else 222 double *av, *bv; 223 #endif 224 225 PetscFunctionBegin; 226 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 227 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 228 229 if (lu->StatPrint) { /* collect time for mat conversion */ 230 ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); 231 ierr = PetscGetTime(&time0);CHKERRQ(ierr); 232 } 233 234 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 235 if (size > 1) { /* convert mpi A to seq mat A */ 236 ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr); 237 ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr); 238 ierr = ISDestroy(isrow);CHKERRQ(ierr); 239 240 A_seq = *tseq; 241 ierr = PetscFree(tseq);CHKERRQ(ierr); 242 aa = (Mat_SeqAIJ*)A_seq->data; 243 } else { 244 aa = (Mat_SeqAIJ*)A->data; 245 } 246 247 /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */ 248 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 249 #if defined(PETSC_USE_COMPLEX) 250 zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 251 #else 252 dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row); 253 #endif 254 } else { /* successive numeric factorization, sparsity pattern is reused. */ 255 Destroy_CompCol_Matrix_dist(&lu->A_sup); 256 Destroy_LU(N, &lu->grid, &lu->LUstruct); 257 lu->options.Fact = SamePattern; 258 } 259 #if defined(PETSC_USE_COMPLEX) 260 zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row); 261 #else 262 dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row); 263 #endif 264 265 /* Create compressed column matrix A_sup. */ 266 #if defined(PETSC_USE_COMPLEX) 267 zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE); 268 #else 269 dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE); 270 #endif 271 } else { /* distributed mat input */ 272 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 273 aa=(Mat_SeqAIJ*)(mat->A)->data; 274 bb=(Mat_SeqAIJ*)(mat->B)->data; 275 ai=aa->i; aj=aa->j; 276 bi=bb->i; bj=bb->j; 277 #if defined(PETSC_USE_COMPLEX) 278 av=(doublecomplex*)aa->a; 279 bv=(doublecomplex*)bb->a; 280 #else 281 av=aa->a; 282 bv=bb->a; 283 #endif 284 rstart = mat->rstart; 285 nz = aa->nz + bb->nz; 286 garray = mat->garray; 287 rstart = mat->rstart; 288 289 if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */ 290 #if defined(PETSC_USE_COMPLEX) 291 zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 292 #else 293 dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row); 294 #endif 295 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 296 /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* crash! */ 297 Destroy_LU(N, &lu->grid, &lu->LUstruct); 298 lu->options.Fact = SamePattern; 299 } 300 nz = 0; irow = mat->rstart; 301 for ( i=0; i<m; i++ ) { 302 lu->row[i] = nz; 303 countA = ai[i+1] - ai[i]; 304 countB = bi[i+1] - bi[i]; 305 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 306 bjj = bj + bi[i]; 307 308 /* B part, smaller col index */ 309 colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */ 310 jB = 0; 311 for (j=0; j<countB; j++){ 312 jcol = garray[bjj[j]]; 313 if (jcol > colA_start) { 314 jB = j; 315 break; 316 } 317 lu->col[nz] = jcol; 318 lu->val[nz++] = *bv++; 319 if (j==countB-1) jB = countB; 320 } 321 322 /* A part */ 323 for (j=0; j<countA; j++){ 324 lu->col[nz] = mat->rstart + ajj[j]; 325 lu->val[nz++] = *av++; 326 } 327 328 /* B part, larger col index */ 329 for (j=jB; j<countB; j++){ 330 lu->col[nz] = garray[bjj[j]]; 331 lu->val[nz++] = *bv++; 332 } 333 } 334 lu->row[m] = nz; 335 #if defined(PETSC_USE_COMPLEX) 336 zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 337 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE); 338 #else 339 dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart, 340 lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE); 341 #endif 342 } 343 if (lu->StatPrint) { 344 ierr = PetscGetTime(&time);CHKERRQ(ierr); 345 time0 = time - time0; 346 } 347 348 /* Factor the matrix. */ 349 PStatInit(&stat); /* Initialize the statistics variables. */ 350 351 if (lu->MatInputMode == GLOBAL) { /* global mat input */ 352 #if defined(PETSC_USE_COMPLEX) 353 pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 354 &lu->grid, &lu->LUstruct, berr, &stat, &info); 355 #else 356 pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, 357 &lu->grid, &lu->LUstruct, berr, &stat, &info); 358 #endif 359 } else { /* distributed mat input */ 360 #if defined(PETSC_USE_COMPLEX) 361 pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 362 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 363 if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info); 364 #else 365 pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid, 366 &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info); 367 if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info); 368 #endif 369 } 370 371 if (lu->MatInputMode == GLOBAL && size > 1){ 372 ierr = MatDestroy(A_seq);CHKERRQ(ierr); 373 } 374 375 if (lu->StatPrint) { 376 if (size > 1){ 377 ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm); 378 ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm); 379 ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm); 380 time = time/size; /* average time */ 381 if (!rank) 382 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \ 383 %g / %g / %g\n",time_max,time_min,time); 384 } else { 385 ierr = PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n \ 386 %g\n",time0); 387 } 388 389 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 390 } 391 PStatFree(&stat); 392 (*F)->assembled = PETSC_TRUE; 393 lu->flg = SAME_NONZERO_PATTERN; 394 PetscFunctionReturn(0); 395 } 396 397 /* Note the Petsc r and c permutations are ignored */ 398 #undef __FUNCT__ 399 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST" 400 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 401 { 402 Mat B; 403 Mat_SuperLU_DIST *lu; 404 PetscErrorCode ierr; 405 int M=A->M,N=A->N,size,indx; 406 superlu_options_t options; 407 PetscTruth flg; 408 const char *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"}; 409 const char *prtype[] = {"LargeDiag","NATURAL"}; 410 411 PetscFunctionBegin; 412 /* Create the factorization matrix */ 413 ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr); 414 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 415 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL); 416 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 417 418 B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 419 B->ops->solve = MatSolve_SuperLU_DIST; 420 B->factor = FACTOR_LU; 421 422 lu = (Mat_SuperLU_DIST*)(B->spptr); 423 424 /* Set the input options */ 425 set_default_options_dist(&options); 426 lu->MatInputMode = GLOBAL; 427 ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr); 428 429 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 430 lu->nprow = size/2; /* Default process rows. */ 431 if (!lu->nprow) lu->nprow = 1; 432 lu->npcol = size/lu->nprow; /* Default process columns. */ 433 434 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 435 436 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr); 437 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr); 438 if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol"); 439 440 ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr); 441 if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL; 442 443 ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 444 if (!flg) { 445 options.Equil = NO; 446 } 447 448 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr); 449 if (flg) { 450 switch (indx) { 451 case 0: 452 options.RowPerm = LargeDiag; 453 break; 454 case 1: 455 options.RowPerm = NOROWPERM; 456 break; 457 } 458 } 459 460 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr); 461 if (flg) { 462 switch (indx) { 463 case 0: 464 options.ColPerm = MMD_AT_PLUS_A; 465 break; 466 case 1: 467 options.ColPerm = NATURAL; 468 break; 469 case 2: 470 options.ColPerm = MMD_ATA; 471 break; 472 case 3: 473 options.ColPerm = COLAMD; 474 break; 475 } 476 } 477 478 ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr); 479 if (!flg) { 480 options.ReplaceTinyPivot = NO; 481 } 482 483 options.IterRefine = NOREFINE; 484 ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 485 if (flg) { 486 options.IterRefine = DOUBLE; 487 } 488 489 if (PetscLogPrintInfo) { 490 lu->StatPrint = (int)PETSC_TRUE; 491 } else { 492 lu->StatPrint = (int)PETSC_FALSE; 493 } 494 ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None", 495 (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr); 496 PetscOptionsEnd(); 497 498 /* Initialize the SuperLU process grid. */ 499 superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid); 500 501 /* Initialize ScalePermstruct and LUstruct. */ 502 ScalePermstructInit(M, N, &lu->ScalePermstruct); 503 LUstructInit(M, N, &lu->LUstruct); 504 505 lu->options = options; 506 lu->flg = DIFFERENT_NONZERO_PATTERN; 507 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 508 *F = B; 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST" 514 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) { 515 PetscErrorCode ierr; 516 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 517 518 PetscFunctionBegin; 519 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 520 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 521 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST" 527 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer) 528 { 529 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr; 530 superlu_options_t options; 531 PetscErrorCode ierr; 532 char *colperm; 533 534 PetscFunctionBegin; 535 /* check if matrix is superlu_dist type */ 536 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 537 538 options = lu->options; 539 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 540 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr); 541 ierr = PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr); 542 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr); 543 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr); 544 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 545 ierr = PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr); 546 if (options.ColPerm == NATURAL) { 547 colperm = "NATURAL"; 548 } else if (options.ColPerm == MMD_AT_PLUS_A) { 549 colperm = "MMD_AT_PLUS_A"; 550 } else if (options.ColPerm == MMD_ATA) { 551 colperm = "MMD_ATA"; 552 } else if (options.ColPerm == COLAMD) { 553 colperm = "COLAMD"; 554 } else { 555 SETERRQ(1,"Unknown column permutation"); 556 } 557 ierr = PetscViewerASCIIPrintf(viewer," Column permutation %s \n",colperm);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatView_SuperLU_DIST" 563 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 564 { 565 PetscErrorCode ierr; 566 PetscTruth iascii; 567 PetscViewerFormat format; 568 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr); 569 570 PetscFunctionBegin; 571 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 572 573 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 574 if (iascii) { 575 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 576 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 577 ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 578 } 579 } 580 PetscFunctionReturn(0); 581 } 582 583 584 EXTERN_C_BEGIN 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST" 587 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) { 588 /* This routine is only called to convert to MATSUPERLU_DIST */ 589 /* from MATSEQAIJ if A has a single process communicator */ 590 /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */ 591 PetscErrorCode ierr; 592 int size; 593 MPI_Comm comm; 594 Mat B=*newmat; 595 Mat_SuperLU_DIST *lu; 596 597 PetscFunctionBegin; 598 if (B != A) { 599 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 600 } 601 602 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 603 ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr); 604 605 lu->MatDuplicate = A->ops->duplicate; 606 lu->MatView = A->ops->view; 607 lu->MatAssemblyEnd = A->ops->assemblyend; 608 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 609 lu->MatDestroy = A->ops->destroy; 610 lu->CleanUpSuperLU_Dist = PETSC_FALSE; 611 612 B->spptr = (void*)lu; 613 B->ops->duplicate = MatDuplicate_SuperLU_DIST; 614 B->ops->view = MatView_SuperLU_DIST; 615 B->ops->assemblyend = MatAssemblyEnd_SuperLU_DIST; 616 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 617 B->ops->destroy = MatDestroy_SuperLU_DIST; 618 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr); 619 if (size == 1) { 620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C", 621 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C", 623 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 624 } else { 625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C", 626 "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr); 627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C", 628 "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr); 629 } 630 PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves."); 631 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr); 632 *newmat = B; 633 PetscFunctionReturn(0); 634 } 635 EXTERN_C_END 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatDuplicate_SuperLU_DIST" 639 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) { 640 PetscErrorCode ierr; 641 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr; 642 643 PetscFunctionBegin; 644 ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 645 ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 /*MC 650 MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 651 via the external package SuperLU_DIST. 652 653 If SuperLU_DIST is installed (see the manual for 654 instructions on how to declare the existence of external packages), 655 a matrix type can be constructed which invokes SuperLU_DIST solvers. 656 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST). 657 This matrix type is only supported for double precision real. 658 659 This matrix inherits from MATSEQAIJ when constructed with a single process communicator, 660 and from MATMPIAIJ otherwise. As a result, for single process communicators, 661 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 662 for communicators controlling multiple processes. It is recommended that you call both of 663 the above preallocation routines for simplicity. One can also call MatConvert for an inplace 664 conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size) 665 without data copy. 666 667 Options Database Keys: 668 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions() 669 . -mat_superlu_dist_r <n> - number of rows in processor partition 670 . -mat_superlu_dist_c <n> - number of columns in processor partition 671 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed 672 . -mat_superlu_dist_equil - equilibrate the matrix 673 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation 674 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation 675 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 676 . -mat_superlu_dist_iterrefine - use iterative refinement 677 - -mat_superlu_dist_statprint - print factorization information 678 679 Level: beginner 680 681 .seealso: PCLU 682 M*/ 683 684 EXTERN_C_BEGIN 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatCreate_SuperLU_DIST" 687 PetscErrorCode MatCreate_SuperLU_DIST(Mat A) 688 { 689 PetscErrorCode ierr; 690 int size; 691 Mat A_diag; 692 693 PetscFunctionBegin; 694 /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */ 695 /* and SuperLU_DIST types */ 696 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr); 697 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 698 if (size == 1) { 699 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 702 A_diag = ((Mat_MPIAIJ *)A->data)->A; 703 ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr); 704 } 705 ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr); 706 PetscFunctionReturn(0); 707 } 708 EXTERN_C_END 709 710