1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #include <superlu_zdefs.h> 12 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(6,3,0) 13 #define LUstructInit zLUstructInit 14 #define ScalePermstructInit zScalePermstructInit 15 #define ScalePermstructFree zScalePermstructFree 16 #define LUstructFree zLUstructFree 17 #define Destroy_LU zDestroy_LU 18 #define ScalePermstruct_t zScalePermstruct_t 19 #define LUstruct_t zLUstruct_t 20 #define SOLVEstruct_t zSOLVEstruct_t 21 #endif 22 #else 23 #include <superlu_ddefs.h> 24 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(6,3,0) 25 #define LUstructInit dLUstructInit 26 #define ScalePermstructInit dScalePermstructInit 27 #define ScalePermstructFree dScalePermstructFree 28 #define LUstructFree dLUstructFree 29 #define Destroy_LU dDestroy_LU 30 #define ScalePermstruct_t dScalePermstruct_t 31 #define LUstruct_t dLUstruct_t 32 #define SOLVEstruct_t dSOLVEstruct_t 33 #endif 34 #endif 35 EXTERN_C_END 36 37 typedef struct { 38 int_t nprow,npcol,*row,*col; 39 gridinfo_t grid; 40 superlu_dist_options_t options; 41 SuperMatrix A_sup; 42 ScalePermstruct_t ScalePermstruct; 43 LUstruct_t LUstruct; 44 int StatPrint; 45 SOLVEstruct_t SOLVEstruct; 46 fact_t FactPattern; 47 MPI_Comm comm_superlu; 48 #if defined(PETSC_USE_COMPLEX) 49 doublecomplex *val; 50 #else 51 double *val; 52 #endif 53 PetscBool matsolve_iscalled,matmatsolve_iscalled; 54 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 55 } Mat_SuperLU_DIST; 56 57 58 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 59 { 60 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 61 62 PetscFunctionBegin; 63 #if defined(PETSC_USE_COMPLEX) 64 PetscStackCall("SuperLU_DIST:pzGetDiagU",pzGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,(doublecomplex*)diagU)); 65 #else 66 PetscStackCall("SuperLU_DIST:pdGetDiagU",pdGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,diagU)); 67 #endif 68 PetscFunctionReturn(0); 69 } 70 71 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 72 { 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 77 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 78 PetscFunctionReturn(0); 79 } 80 81 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 82 { 83 PetscErrorCode ierr; 84 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 85 86 PetscFunctionBegin; 87 if (lu->CleanUpSuperLU_Dist) { 88 /* Deallocate SuperLU_DIST storage */ 89 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 90 if (lu->options.SolveInitialized) { 91 #if defined(PETSC_USE_COMPLEX) 92 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 93 #else 94 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 95 #endif 96 } 97 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 98 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 99 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 100 101 /* Release the SuperLU_DIST process grid. */ 102 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 103 ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr); 104 } 105 ierr = PetscFree(A->data);CHKERRQ(ierr); 106 /* clear composed functions */ 107 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 108 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 109 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 114 { 115 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 116 PetscErrorCode ierr; 117 PetscInt m=A->rmap->n; 118 SuperLUStat_t stat; 119 double berr[1]; 120 PetscScalar *bptr=NULL; 121 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 122 static PetscBool cite = PETSC_FALSE; 123 124 PetscFunctionBegin; 125 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 126 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 127 128 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 129 /* see comments in MatMatSolve() */ 130 #if defined(PETSC_USE_COMPLEX) 131 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 132 #else 133 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 134 #endif 135 lu->options.SolveInitialized = NO; 136 } 137 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 138 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 139 140 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 141 #if defined(PETSC_USE_COMPLEX) 142 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 143 #else 144 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 145 #endif 146 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 147 148 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 149 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 150 151 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 152 lu->matsolve_iscalled = PETSC_TRUE; 153 lu->matmatsolve_iscalled = PETSC_FALSE; 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 158 { 159 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 160 PetscErrorCode ierr; 161 PetscInt m=A->rmap->n,nrhs; 162 SuperLUStat_t stat; 163 double berr[1]; 164 PetscScalar *bptr; 165 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 166 PetscBool flg; 167 168 PetscFunctionBegin; 169 if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 170 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 171 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 172 if (X != B_mpi) { 173 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 174 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 175 } 176 177 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 178 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 179 thus destroy it and create a new SOLVEstruct. 180 Otherwise it may result in memory corruption or incorrect solution 181 See src/mat/tests/ex125.c */ 182 #if defined(PETSC_USE_COMPLEX) 183 PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct)); 184 #else 185 PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct)); 186 #endif 187 lu->options.SolveInitialized = NO; 188 } 189 if (X != B_mpi) { 190 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 191 } 192 193 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 194 195 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 196 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 197 198 #if defined(PETSC_USE_COMPLEX) 199 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 200 #else 201 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 202 #endif 203 204 if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info); 205 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 206 207 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 208 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 209 lu->matsolve_iscalled = PETSC_FALSE; 210 lu->matmatsolve_iscalled = PETSC_TRUE; 211 PetscFunctionReturn(0); 212 } 213 214 /* 215 input: 216 F: numeric Cholesky factor 217 output: 218 nneg: total number of negative pivots 219 nzero: total number of zero pivots 220 npos: (global dimension of F) - nneg - nzero 221 */ 222 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 223 { 224 PetscErrorCode ierr; 225 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 226 PetscScalar *diagU=NULL; 227 PetscInt M,i,neg=0,zero=0,pos=0; 228 PetscReal r; 229 230 PetscFunctionBegin; 231 if (!F->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 232 if (lu->options.RowPerm != NOROWPERM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 233 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 234 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 235 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 236 for (i=0; i<M; i++) { 237 #if defined(PETSC_USE_COMPLEX) 238 r = PetscImaginaryPart(diagU[i])/10.0; 239 if (r< -PETSC_MACHINE_EPSILON || r>PETSC_MACHINE_EPSILON) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%d]=%g + i %g is non-real",i,PetscRealPart(diagU[i]),r*10.0); 240 r = PetscRealPart(diagU[i]); 241 #else 242 r = diagU[i]; 243 #endif 244 if (r > 0) { 245 pos++; 246 } else if (r < 0) { 247 neg++; 248 } else zero++; 249 } 250 251 ierr = PetscFree(diagU);CHKERRQ(ierr); 252 if (nneg) *nneg = neg; 253 if (nzero) *nzero = zero; 254 if (npos) *npos = pos; 255 PetscFunctionReturn(0); 256 } 257 258 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 259 { 260 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 261 Mat Aloc; 262 const PetscScalar *av; 263 const PetscInt *ai=NULL,*aj=NULL; 264 PetscInt nz,dummy; 265 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 266 SuperLUStat_t stat; 267 double *berr=0; 268 PetscBool ismpiaij,isseqaij,flg; 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 273 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 274 if (ismpiaij) { 275 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc);CHKERRQ(ierr); 276 } else if (isseqaij) { 277 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 278 Aloc = A; 279 } else SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 280 281 ierr = MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 282 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 283 ierr = MatSeqAIJGetArrayRead(Aloc,&av);CHKERRQ(ierr); 284 nz = ai[Aloc->rmap->n]; 285 286 /* Allocations for A_sup */ 287 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 288 #if defined(PETSC_USE_COMPLEX) 289 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 290 #else 291 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 292 #endif 293 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 294 if (lu->FactPattern == SamePattern_SameRowPerm) { 295 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 296 } else if (lu->FactPattern == SamePattern) { 297 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); /* Deallocate L and U matrices. */ 298 lu->options.Fact = SamePattern; 299 } else if (lu->FactPattern == DOFACT) { 300 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 301 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 302 lu->options.Fact = DOFACT; 303 304 #if defined(PETSC_USE_COMPLEX) 305 PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 306 #else 307 PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 308 #endif 309 } else { 310 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 311 } 312 } 313 314 /* Copy AIJ matrix to superlu_dist matrix */ 315 ierr = PetscArraycpy(lu->row,ai,Aloc->rmap->n+1);CHKERRQ(ierr); 316 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 317 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 318 ierr = MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 319 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 320 ierr = MatSeqAIJRestoreArrayRead(Aloc,&av);CHKERRQ(ierr); 321 ierr = MatDestroy(&Aloc);CHKERRQ(ierr); 322 323 /* Create and setup A_sup */ 324 if (lu->options.Fact == DOFACT) { 325 #if defined(PETSC_USE_COMPLEX) 326 PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE)); 327 #else 328 PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE)); 329 #endif 330 } 331 332 /* Factor the matrix. */ 333 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 334 #if defined(PETSC_USE_COMPLEX) 335 PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 336 #else 337 PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 338 #endif 339 340 if (sinfo > 0) { 341 if (A->erroriffailure) { 342 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); 343 } else { 344 if (sinfo <= lu->A_sup.ncol) { 345 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 346 ierr = PetscInfo1(F,"U(i,i) is exactly zero, i= %D\n",sinfo);CHKERRQ(ierr); 347 } else if (sinfo > lu->A_sup.ncol) { 348 /* 349 number of bytes allocated when memory allocation 350 failure occurred, plus A->ncol. 351 */ 352 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 353 ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr); 354 } 355 } 356 } else if (sinfo < 0) { 357 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, argument in p*gssvx() had an illegal value", sinfo); 358 } 359 360 if (lu->options.PrintStat) { 361 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 362 } 363 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 364 F->assembled = PETSC_TRUE; 365 F->preallocated = PETSC_TRUE; 366 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 367 PetscFunctionReturn(0); 368 } 369 370 /* Note the Petsc r and c permutations are ignored */ 371 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 372 { 373 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 374 PetscInt M = A->rmap->N,N=A->cmap->N; 375 376 PetscFunctionBegin; 377 /* Initialize the SuperLU process grid. */ 378 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 379 380 /* Initialize ScalePermstruct and LUstruct. */ 381 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 382 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 383 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 384 F->ops->solve = MatSolve_SuperLU_DIST; 385 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 386 F->ops->getinertia = NULL; 387 388 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 389 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 390 PetscFunctionReturn(0); 391 } 392 393 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 394 { 395 PetscErrorCode ierr; 396 397 PetscFunctionBegin; 398 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 399 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 400 PetscFunctionReturn(0); 401 } 402 403 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 404 { 405 PetscFunctionBegin; 406 *type = MATSOLVERSUPERLU_DIST; 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 411 { 412 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 413 superlu_dist_options_t options; 414 PetscErrorCode ierr; 415 416 PetscFunctionBegin; 417 /* check if matrix is superlu_dist type */ 418 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 419 420 options = lu->options; 421 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 422 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 423 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 424 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 425 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 426 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr); 427 428 switch (options.RowPerm) { 429 case NOROWPERM: 430 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 431 break; 432 case LargeDiag_MC64: 433 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 434 break; 435 case LargeDiag_AWPM: 436 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 437 break; 438 case MY_PERMR: 439 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 440 break; 441 default: 442 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 443 } 444 445 switch (options.ColPerm) { 446 case NATURAL: 447 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 448 break; 449 case MMD_AT_PLUS_A: 450 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 451 break; 452 case MMD_ATA: 453 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 454 break; 455 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 456 case METIS_AT_PLUS_A: 457 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 458 break; 459 case PARMETIS: 460 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 461 break; 462 default: 463 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 464 } 465 466 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 467 468 if (lu->FactPattern == SamePattern) { 469 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 470 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 471 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 472 } else if (lu->FactPattern == DOFACT) { 473 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 474 } else { 475 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 476 } 477 PetscFunctionReturn(0); 478 } 479 480 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 481 { 482 PetscErrorCode ierr; 483 PetscBool iascii; 484 PetscViewerFormat format; 485 486 PetscFunctionBegin; 487 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 488 if (iascii) { 489 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 490 if (format == PETSC_VIEWER_ASCII_INFO) { 491 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 492 } 493 } 494 PetscFunctionReturn(0); 495 } 496 497 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 498 { 499 Mat B; 500 Mat_SuperLU_DIST *lu; 501 PetscErrorCode ierr; 502 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 503 PetscMPIInt size; 504 superlu_dist_options_t options; 505 PetscBool flg; 506 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 507 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 508 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 509 PetscBool set; 510 511 PetscFunctionBegin; 512 /* Create the factorization matrix */ 513 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 514 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 515 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 516 ierr = MatSetUp(B);CHKERRQ(ierr); 517 B->ops->getinfo = MatGetInfo_External; 518 B->ops->view = MatView_SuperLU_DIST; 519 B->ops->destroy = MatDestroy_SuperLU_DIST; 520 521 /* Set the default input options: 522 options.Fact = DOFACT; 523 options.Equil = YES; 524 options.ParSymbFact = NO; 525 options.ColPerm = METIS_AT_PLUS_A; 526 options.RowPerm = LargeDiag_MC64; 527 options.ReplaceTinyPivot = YES; 528 options.IterRefine = DOUBLE; 529 options.Trans = NOTRANS; 530 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 531 options.RefineInitialized = NO; 532 options.PrintStat = YES; 533 options.SymPattern = NO; 534 */ 535 set_default_options_dist(&options); 536 537 if (ftype == MAT_FACTOR_LU) { 538 B->factortype = MAT_FACTOR_LU; 539 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 540 } else { 541 B->factortype = MAT_FACTOR_CHOLESKY; 542 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 543 options.SymPattern = YES; 544 } 545 546 /* set solvertype */ 547 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 548 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 549 550 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 551 B->data = lu; 552 553 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)A),&(lu->comm_superlu));CHKERRQ(ierr); 554 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 555 /* Default num of process columns and rows */ 556 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 557 if (!lu->nprow) lu->nprow = 1; 558 while (lu->nprow > 0) { 559 lu->npcol = (int_t) (size/lu->nprow); 560 if (size == lu->nprow * lu->npcol) break; 561 lu->nprow--; 562 } 563 564 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 565 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 566 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 567 if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol); 568 569 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 570 if (set && !flg) options.Equil = NO; 571 572 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 573 if (flg) { 574 switch (indx) { 575 case 0: 576 options.RowPerm = NOROWPERM; 577 break; 578 case 1: 579 options.RowPerm = LargeDiag_MC64; 580 break; 581 case 2: 582 options.RowPerm = LargeDiag_AWPM; 583 break; 584 case 3: 585 options.RowPerm = MY_PERMR; 586 break; 587 default: 588 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 589 } 590 } 591 592 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 593 if (flg) { 594 switch (indx) { 595 case 0: 596 options.ColPerm = NATURAL; 597 break; 598 case 1: 599 options.ColPerm = MMD_AT_PLUS_A; 600 break; 601 case 2: 602 options.ColPerm = MMD_ATA; 603 break; 604 case 3: 605 options.ColPerm = METIS_AT_PLUS_A; 606 break; 607 case 4: 608 options.ColPerm = PARMETIS; /* only works for np>1 */ 609 break; 610 default: 611 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 612 } 613 } 614 615 options.ReplaceTinyPivot = NO; 616 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 617 if (set && flg) options.ReplaceTinyPivot = YES; 618 619 options.ParSymbFact = NO; 620 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 621 if (set && flg && size>1) { 622 #if defined(PETSC_HAVE_PARMETIS) 623 options.ParSymbFact = YES; 624 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 625 #else 626 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 627 #endif 628 } 629 630 lu->FactPattern = SamePattern; 631 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 632 if (flg) { 633 switch (indx) { 634 case 0: 635 lu->FactPattern = SamePattern; 636 break; 637 case 1: 638 lu->FactPattern = SamePattern_SameRowPerm; 639 break; 640 case 2: 641 lu->FactPattern = DOFACT; 642 break; 643 } 644 } 645 646 options.IterRefine = NOREFINE; 647 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 648 if (set) { 649 if (flg) options.IterRefine = SLU_DOUBLE; 650 else options.IterRefine = NOREFINE; 651 } 652 653 if (PetscLogPrintInfo) options.PrintStat = YES; 654 else options.PrintStat = NO; 655 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 657 658 lu->options = options; 659 lu->options.Fact = DOFACT; 660 lu->matsolve_iscalled = PETSC_FALSE; 661 lu->matmatsolve_iscalled = PETSC_FALSE; 662 663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 665 666 *F = B; 667 PetscFunctionReturn(0); 668 } 669 670 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 671 { 672 PetscErrorCode ierr; 673 PetscFunctionBegin; 674 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 675 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 676 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 677 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 /*MC 682 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 683 684 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 685 686 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 687 688 Works with AIJ matrices 689 690 Options Database Keys: 691 + -mat_superlu_dist_r <n> - number of rows in processor partition 692 . -mat_superlu_dist_c <n> - number of columns in processor partition 693 . -mat_superlu_dist_equil - equilibrate the matrix 694 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 695 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 696 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 697 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 698 . -mat_superlu_dist_iterrefine - use iterative refinement 699 - -mat_superlu_dist_statprint - print factorization information 700 701 Level: beginner 702 703 .seealso: PCLU 704 705 .seealso: PCFactorSetMatSolverType(), MatSolverType 706 707 M*/ 708