1 2 /* 3 Provides an interface to the SuperLU_DIST sparse solver 4 */ 5 6 #include <../src/mat/impls/aij/seq/aij.h> 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */ 9 #include <stdlib.h> 10 #endif 11 12 EXTERN_C_BEGIN 13 #if defined(PETSC_USE_COMPLEX) 14 #include <superlu_zdefs.h> 15 #else 16 #include <superlu_ddefs.h> 17 #endif 18 EXTERN_C_END 19 20 typedef struct { 21 int_t nprow,npcol,*row,*col; 22 gridinfo_t grid; 23 superlu_dist_options_t options; 24 SuperMatrix A_sup; 25 ScalePermstruct_t ScalePermstruct; 26 LUstruct_t LUstruct; 27 int StatPrint; 28 SOLVEstruct_t SOLVEstruct; 29 fact_t FactPattern; 30 MPI_Comm comm_superlu; 31 #if defined(PETSC_USE_COMPLEX) 32 doublecomplex *val; 33 #else 34 double *val; 35 #endif 36 PetscBool matsolve_iscalled,matmatsolve_iscalled; 37 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 38 } Mat_SuperLU_DIST; 39 40 41 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 42 { 43 Mat_SuperLU_DIST *lu= (Mat_SuperLU_DIST*)F->data; 44 45 PetscFunctionBegin; 46 #if defined(PETSC_USE_COMPLEX) 47 PetscStackCall("SuperLU_DIST:pzGetDiagU",pzGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,(doublecomplex*)diagU)); 48 #else 49 PetscStackCall("SuperLU_DIST:pdGetDiagU",pdGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,diagU)); 50 #endif 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 55 { 56 PetscErrorCode ierr; 57 58 PetscFunctionBegin; 59 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 60 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 61 PetscFunctionReturn(0); 62 } 63 64 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 65 { 66 PetscErrorCode ierr; 67 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 68 69 PetscFunctionBegin; 70 if (lu->CleanUpSuperLU_Dist) { 71 /* Deallocate SuperLU_DIST storage */ 72 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 73 if (lu->options.SolveInitialized) { 74 #if defined(PETSC_USE_COMPLEX) 75 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 76 #else 77 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 78 #endif 79 } 80 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 81 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 82 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 83 84 /* Release the SuperLU_DIST process grid. */ 85 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 86 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 87 } 88 ierr = PetscFree(A->data);CHKERRQ(ierr); 89 /* clear composed functions */ 90 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 91 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 92 93 PetscFunctionReturn(0); 94 } 95 96 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 97 { 98 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 99 PetscErrorCode ierr; 100 PetscMPIInt size; 101 PetscInt m=A->rmap->n; 102 SuperLUStat_t stat; 103 double berr[1]; 104 PetscScalar *bptr=NULL; 105 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 106 static PetscBool cite = PETSC_FALSE; 107 108 PetscFunctionBegin; 109 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 110 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 111 112 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 113 114 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 115 /* see comments in MatMatSolve() */ 116 #if defined(PETSC_USE_COMPLEX) 117 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 118 #else 119 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 120 #endif 121 lu->options.SolveInitialized = NO; 122 } 123 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 124 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 125 126 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 127 #if defined(PETSC_USE_COMPLEX) 128 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 129 #else 130 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 131 #endif 132 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 133 134 if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 135 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 136 137 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 138 lu->matsolve_iscalled = PETSC_TRUE; 139 lu->matmatsolve_iscalled = PETSC_FALSE; 140 PetscFunctionReturn(0); 141 } 142 143 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 144 { 145 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 146 PetscErrorCode ierr; 147 PetscMPIInt size; 148 PetscInt m=A->rmap->n,nrhs; 149 SuperLUStat_t stat; 150 double berr[1]; 151 PetscScalar *bptr; 152 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 153 PetscBool flg; 154 155 PetscFunctionBegin; 156 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED"); 157 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 158 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 159 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 160 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 161 162 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 163 164 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 165 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 166 thus destroy it and create a new SOLVEstruct. 167 Otherwise it may result in memory corruption or incorrect solution 168 See src/mat/examples/tests/ex125.c */ 169 #if defined(PETSC_USE_COMPLEX) 170 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 171 #else 172 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 173 #endif 174 lu->options.SolveInitialized = NO; 175 } 176 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 177 178 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 179 180 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 181 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 182 183 #if defined(PETSC_USE_COMPLEX) 184 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 185 #else 186 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 187 #endif 188 189 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 190 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 191 192 if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 193 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 194 lu->matsolve_iscalled = PETSC_FALSE; 195 lu->matmatsolve_iscalled = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 /* 200 input: 201 F: numeric Cholesky factor 202 output: 203 nneg: total number of negative pivots 204 nzero: total number of zero pivots 205 npos: (global dimension of F) - nneg - nzero 206 */ 207 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 208 { 209 PetscErrorCode ierr; 210 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 211 PetscScalar *diagU=NULL; 212 PetscInt M,i,neg=0,zero=0,pos=0; 213 PetscReal r; 214 215 PetscFunctionBegin; 216 if (!F->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 217 if (lu->options.RowPerm != NOROWPERM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 218 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 219 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 220 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 221 for (i=0; i<M; i++) { 222 #if defined(PETSC_USE_COMPLEX) 223 r = PetscImaginaryPart(diagU[i])/10.0; 224 if (r< -PETSC_MACHINE_EPSILON || r>PETSC_MACHINE_EPSILON) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%d]=%g + i %g is non-real",i,PetscRealPart(diagU[i]),r*10.0); 225 r = PetscRealPart(diagU[i]); 226 #else 227 r = diagU[i]; 228 #endif 229 if (r > 0) { 230 pos++; 231 } else if (r < 0) { 232 neg++; 233 } else zero++; 234 } 235 236 ierr = PetscFree(diagU);CHKERRQ(ierr); 237 if (nneg) *nneg = neg; 238 if (nzero) *nzero = zero; 239 if (npos) *npos = pos; 240 PetscFunctionReturn(0); 241 } 242 243 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 244 { 245 Mat_SeqAIJ *aa=NULL,*bb=NULL; 246 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 247 PetscErrorCode ierr; 248 PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai=NULL,*aj=NULL,*bi=NULL,*bj=NULL,nz,rstart,*garray=NULL, 249 m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj=NULL,*ajj=NULL; 250 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 251 PetscMPIInt size; 252 SuperLUStat_t stat; 253 double *berr=0; 254 #if defined(PETSC_USE_COMPLEX) 255 doublecomplex *av=NULL, *bv=NULL; 256 #else 257 double *av=NULL, *bv=NULL; 258 #endif 259 260 PetscFunctionBegin; 261 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 262 263 if (size == 1) { 264 aa = (Mat_SeqAIJ*)A->data; 265 rstart = 0; 266 nz = aa->nz; 267 } else { 268 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 269 aa = (Mat_SeqAIJ*)(mat->A)->data; 270 bb = (Mat_SeqAIJ*)(mat->B)->data; 271 ai = aa->i; aj = aa->j; 272 bi = bb->i; bj = bb->j; 273 #if defined(PETSC_USE_COMPLEX) 274 av = (doublecomplex*)aa->a; 275 bv = (doublecomplex*)bb->a; 276 #else 277 av =aa->a; 278 bv = bb->a; 279 #endif 280 rstart = A->rmap->rstart; 281 nz = aa->nz + bb->nz; 282 garray = mat->garray; 283 } 284 285 /* Allocations for A_sup */ 286 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 287 #if defined(PETSC_USE_COMPLEX) 288 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 289 #else 290 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 291 #endif 292 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 293 if (lu->FactPattern == SamePattern_SameRowPerm) { 294 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 295 } else if (lu->FactPattern == SamePattern) { 296 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); /* Deallocate L and U matrices. */ 297 lu->options.Fact = SamePattern; 298 } else if (lu->FactPattern == DOFACT) { 299 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 300 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); 301 lu->options.Fact = DOFACT; 302 303 #if defined(PETSC_USE_COMPLEX) 304 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 305 #else 306 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row)); 307 #endif 308 } else { 309 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 310 } 311 } 312 313 /* Copy AIJ matrix to superlu_dist matrix */ 314 if (size == 1) { /* A_sup has same SeqAIJ format as input mat */ 315 ai = aa->i; aj = aa->j; 316 #if defined(PETSC_USE_COMPLEX) 317 av = (doublecomplex*)aa->a; 318 #else 319 av = aa->a; 320 #endif 321 322 ierr = PetscArraycpy(lu->row,ai,(m+1));CHKERRQ(ierr); 323 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 324 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 325 } else { 326 nz = 0; 327 for (i=0; i<m; i++) { 328 lu->row[i] = nz; 329 countA = ai[i+1] - ai[i]; 330 countB = bi[i+1] - bi[i]; 331 if (aj) { 332 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 333 } else { 334 ajj = NULL; 335 } 336 bjj = bj + bi[i]; 337 338 /* B part, smaller col index */ 339 if (aj) { 340 colA_start = rstart + ajj[0]; /* the smallest global col index of A */ 341 } else { /* superlu_dist does not require matrix has diagonal entries, thus aj=NULL would work */ 342 colA_start = rstart; 343 } 344 jB = 0; 345 for (j=0; j<countB; j++) { 346 jcol = garray[bjj[j]]; 347 if (jcol > colA_start) { 348 jB = j; 349 break; 350 } 351 lu->col[nz] = jcol; 352 lu->val[nz++] = *bv++; 353 if (j==countB-1) jB = countB; 354 } 355 356 /* A part */ 357 for (j=0; j<countA; j++) { 358 lu->col[nz] = rstart + ajj[j]; 359 lu->val[nz++] = *av++; 360 } 361 362 /* B part, larger col index */ 363 for (j=jB; j<countB; j++) { 364 lu->col[nz] = garray[bjj[j]]; 365 lu->val[nz++] = *bv++; 366 } 367 } 368 lu->row[m] = nz; 369 } 370 371 /* Create and setup A_sup */ 372 if (lu->options.Fact == DOFACT) { 373 #if defined(PETSC_USE_COMPLEX) 374 PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); 375 #else 376 PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); 377 #endif 378 } 379 380 /* Factor the matrix. */ 381 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 382 #if defined(PETSC_USE_COMPLEX) 383 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 384 #else 385 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 386 #endif 387 388 if (sinfo > 0) { 389 if (A->erroriffailure) { 390 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); 391 } else { 392 if (sinfo <= lu->A_sup.ncol) { 393 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 394 ierr = PetscInfo1(F,"U(i,i) is exactly zero, i= %D\n",sinfo);CHKERRQ(ierr); 395 } else if (sinfo > lu->A_sup.ncol) { 396 /* 397 number of bytes allocated when memory allocation 398 failure occurred, plus A->ncol. 399 */ 400 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 401 ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr); 402 } 403 } 404 } else if (sinfo < 0) { 405 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, argument in p*gssvx() had an illegal value", sinfo); 406 } 407 408 if (lu->options.PrintStat) { 409 PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */ 410 } 411 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 412 F->assembled = PETSC_TRUE; 413 F->preallocated = PETSC_TRUE; 414 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 415 PetscFunctionReturn(0); 416 } 417 418 /* Note the Petsc r and c permutations are ignored */ 419 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 420 { 421 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 422 PetscInt M = A->rmap->N,N=A->cmap->N; 423 424 PetscFunctionBegin; 425 /* Initialize the SuperLU process grid. */ 426 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 427 428 /* Initialize ScalePermstruct and LUstruct. */ 429 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 430 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 431 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 432 F->ops->solve = MatSolve_SuperLU_DIST; 433 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 434 F->ops->getinertia = NULL; 435 436 if (A->symmetric || A->hermitian) { 437 F->ops->getinertia = MatGetInertia_SuperLU_DIST; 438 } 439 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 440 PetscFunctionReturn(0); 441 } 442 443 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 444 { 445 PetscErrorCode ierr; 446 447 PetscFunctionBegin; 448 if (!A->symmetric) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Input matrix must be symmetric\n"); 449 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 450 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 451 PetscFunctionReturn(0); 452 } 453 454 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 455 { 456 PetscFunctionBegin; 457 *type = MATSOLVERSUPERLU_DIST; 458 PetscFunctionReturn(0); 459 } 460 461 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 462 { 463 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 464 superlu_dist_options_t options; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 /* check if matrix is superlu_dist type */ 469 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 470 471 options = lu->options; 472 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 473 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 474 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 475 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 476 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 477 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 478 479 switch (options.RowPerm) { 480 case NOROWPERM: 481 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 482 break; 483 case LargeDiag_MC64: 484 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 485 break; 486 case LargeDiag_AWPM: 487 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 488 break; 489 case MY_PERMR: 490 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 491 break; 492 default: 493 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 494 } 495 496 switch (options.ColPerm) { 497 case NATURAL: 498 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 499 break; 500 case MMD_AT_PLUS_A: 501 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 502 break; 503 case MMD_ATA: 504 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 505 break; 506 case METIS_AT_PLUS_A: 507 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 508 break; 509 case PARMETIS: 510 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 511 break; 512 default: 513 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 514 } 515 516 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 517 518 if (lu->FactPattern == SamePattern) { 519 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 520 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 521 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 522 } else if (lu->FactPattern == DOFACT) { 523 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 524 } else { 525 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 526 } 527 PetscFunctionReturn(0); 528 } 529 530 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 531 { 532 PetscErrorCode ierr; 533 PetscBool iascii; 534 PetscViewerFormat format; 535 536 PetscFunctionBegin; 537 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 538 if (iascii) { 539 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 540 if (format == PETSC_VIEWER_ASCII_INFO) { 541 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 542 } 543 } 544 PetscFunctionReturn(0); 545 } 546 547 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 548 { 549 Mat B; 550 Mat_SuperLU_DIST *lu; 551 PetscErrorCode ierr; 552 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 553 PetscMPIInt size; 554 superlu_dist_options_t options; 555 PetscBool flg; 556 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 557 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 558 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 559 PetscBool set; 560 561 PetscFunctionBegin; 562 /* Create the factorization matrix */ 563 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 564 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 565 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 566 ierr = MatSetUp(B);CHKERRQ(ierr); 567 B->ops->getinfo = MatGetInfo_External; 568 B->ops->view = MatView_SuperLU_DIST; 569 B->ops->destroy = MatDestroy_SuperLU_DIST; 570 571 if (ftype == MAT_FACTOR_LU) { 572 B->factortype = MAT_FACTOR_LU; 573 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 574 } else { 575 B->factortype = MAT_FACTOR_CHOLESKY; 576 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 577 } 578 579 /* set solvertype */ 580 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 581 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 582 583 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 584 B->data = lu; 585 586 /* Set the default input options: 587 options.Fact = DOFACT; 588 options.Equil = YES; 589 options.ParSymbFact = NO; 590 options.ColPerm = METIS_AT_PLUS_A; 591 options.RowPerm = LargeDiag_MC64; 592 options.ReplaceTinyPivot = YES; 593 options.IterRefine = DOUBLE; 594 options.Trans = NOTRANS; 595 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 596 options.RefineInitialized = NO; 597 options.PrintStat = YES; 598 */ 599 set_default_options_dist(&options); 600 601 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(lu->comm_superlu));CHKERRQ(ierr); 602 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 603 /* Default num of process columns and rows */ 604 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 605 if (!lu->nprow) lu->nprow = 1; 606 while (lu->nprow > 0) { 607 lu->npcol = (int_t) (size/lu->nprow); 608 if (size == lu->nprow * lu->npcol) break; 609 lu->nprow--; 610 } 611 612 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 613 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 614 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 615 if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 616 617 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 618 if (set && !flg) options.Equil = NO; 619 620 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 621 if (flg) { 622 switch (indx) { 623 case 0: 624 options.RowPerm = NOROWPERM; 625 break; 626 case 1: 627 options.RowPerm = LargeDiag_MC64; 628 break; 629 case 2: 630 options.RowPerm = LargeDiag_AWPM; 631 break; 632 case 3: 633 options.RowPerm = MY_PERMR; 634 break; 635 default: 636 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 637 } 638 } 639 640 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 641 if (flg) { 642 switch (indx) { 643 case 0: 644 options.ColPerm = NATURAL; 645 break; 646 case 1: 647 options.ColPerm = MMD_AT_PLUS_A; 648 break; 649 case 2: 650 options.ColPerm = MMD_ATA; 651 break; 652 case 3: 653 options.ColPerm = METIS_AT_PLUS_A; 654 break; 655 case 4: 656 options.ColPerm = PARMETIS; /* only works for np>1 */ 657 break; 658 default: 659 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 660 } 661 } 662 663 options.ReplaceTinyPivot = NO; 664 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 665 if (set && flg) options.ReplaceTinyPivot = YES; 666 667 options.ParSymbFact = NO; 668 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 669 if (set && flg && size>1) { 670 #if defined(PETSC_HAVE_PARMETIS) 671 options.ParSymbFact = YES; 672 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 673 #else 674 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 675 #endif 676 } 677 678 lu->FactPattern = SamePattern; 679 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 680 if (flg) { 681 switch (indx) { 682 case 0: 683 lu->FactPattern = SamePattern; 684 break; 685 case 1: 686 lu->FactPattern = SamePattern_SameRowPerm; 687 break; 688 case 2: 689 lu->FactPattern = DOFACT; 690 break; 691 } 692 } 693 694 options.IterRefine = NOREFINE; 695 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 696 if (set) { 697 if (flg) options.IterRefine = SLU_DOUBLE; 698 else options.IterRefine = NOREFINE; 699 } 700 701 if (PetscLogPrintInfo) options.PrintStat = YES; 702 else options.PrintStat = NO; 703 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 704 ierr = PetscOptionsEnd();CHKERRQ(ierr); 705 706 lu->options = options; 707 lu->options.Fact = DOFACT; 708 lu->matsolve_iscalled = PETSC_FALSE; 709 lu->matmatsolve_iscalled = PETSC_FALSE; 710 711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 712 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 713 714 *F = B; 715 PetscFunctionReturn(0); 716 } 717 718 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 719 { 720 PetscErrorCode ierr; 721 PetscFunctionBegin; 722 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ, MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 723 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ, MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 724 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ, MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 725 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ, MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 726 PetscFunctionReturn(0); 727 } 728 729 /*MC 730 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 731 732 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 733 734 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 735 736 Works with AIJ matrices 737 738 Options Database Keys: 739 + -mat_superlu_dist_r <n> - number of rows in processor partition 740 . -mat_superlu_dist_c <n> - number of columns in processor partition 741 . -mat_superlu_dist_equil - equilibrate the matrix 742 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 743 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation 744 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 745 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 746 . -mat_superlu_dist_iterrefine - use iterative refinement 747 - -mat_superlu_dist_statprint - print factorization information 748 749 Level: beginner 750 751 .seealso: PCLU 752 753 .seealso: PCFactorSetMatSolverType(), MatSolverType 754 755 M*/ 756