1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 // To suppress compiler warnings 12 #define DISABLE_CUSPARSE_DEPRECATED 13 #if defined(PETSC_USE_COMPLEX) 14 #define CASTDOUBLECOMPLEX (doublecomplex *) 15 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 16 #include <superlu_zdefs.h> 17 #define LUstructInit zLUstructInit 18 #define ScalePermstructInit zScalePermstructInit 19 #define ScalePermstructFree zScalePermstructFree 20 #define LUstructFree zLUstructFree 21 #define Destroy_LU zDestroy_LU 22 #define ScalePermstruct_t zScalePermstruct_t 23 #define LUstruct_t zLUstruct_t 24 #define SOLVEstruct_t zSOLVEstruct_t 25 #define SolveFinalize zSolveFinalize 26 #define pGetDiagU pzGetDiagU 27 #define pgssvx pzgssvx 28 #define allocateA_dist zallocateA_dist 29 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 30 #define SLU SLU_Z 31 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 32 #define DeAllocLlu_3d zDeAllocLlu_3d 33 #define DeAllocGlu_3d zDeAllocGlu_3d 34 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 35 #define pgssvx3d pzgssvx3d 36 #endif 37 #elif defined(PETSC_USE_REAL_SINGLE) 38 #define CASTDOUBLECOMPLEX 39 #define CASTDOUBLECOMPLEXSTAR 40 #include <superlu_sdefs.h> 41 #define LUstructInit sLUstructInit 42 #define ScalePermstructInit sScalePermstructInit 43 #define ScalePermstructFree sScalePermstructFree 44 #define LUstructFree sLUstructFree 45 #define Destroy_LU sDestroy_LU 46 #define ScalePermstruct_t sScalePermstruct_t 47 #define LUstruct_t sLUstruct_t 48 #define SOLVEstruct_t sSOLVEstruct_t 49 #define SolveFinalize sSolveFinalize 50 #define pGetDiagU psGetDiagU 51 #define pgssvx psgssvx 52 #define allocateA_dist sallocateA_dist 53 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 54 #define SLU SLU_S 55 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 56 #define DeAllocLlu_3d sDeAllocLlu_3d 57 #define DeAllocGlu_3d sDeAllocGlu_3d 58 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 59 #define pgssvx3d psgssvx3d 60 #endif 61 #else 62 #define CASTDOUBLECOMPLEX 63 #define CASTDOUBLECOMPLEXSTAR 64 #include <superlu_ddefs.h> 65 #define LUstructInit dLUstructInit 66 #define ScalePermstructInit dScalePermstructInit 67 #define ScalePermstructFree dScalePermstructFree 68 #define LUstructFree dLUstructFree 69 #define Destroy_LU dDestroy_LU 70 #define ScalePermstruct_t dScalePermstruct_t 71 #define LUstruct_t dLUstruct_t 72 #define SOLVEstruct_t dSOLVEstruct_t 73 #define SolveFinalize dSolveFinalize 74 #define pGetDiagU pdGetDiagU 75 #define pgssvx pdgssvx 76 #define allocateA_dist dallocateA_dist 77 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 78 #define SLU SLU_D 79 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 80 #define DeAllocLlu_3d dDeAllocLlu_3d 81 #define DeAllocGlu_3d dDeAllocGlu_3d 82 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 83 #define pgssvx3d pdgssvx3d 84 #endif 85 #endif 86 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 87 #include <superlu_sdefs.h> 88 #endif 89 EXTERN_C_END 90 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 91 92 typedef struct { 93 int_t nprow, npcol, *row, *col; 94 gridinfo_t grid; 95 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 96 PetscBool use3d; 97 int_t npdep; /* replication factor, must be power of two */ 98 gridinfo3d_t grid3d; 99 #endif 100 superlu_dist_options_t options; 101 SuperMatrix A_sup; 102 ScalePermstruct_t ScalePermstruct; 103 LUstruct_t LUstruct; 104 int StatPrint; 105 SOLVEstruct_t SOLVEstruct; 106 fact_t FactPattern; 107 MPI_Comm comm_superlu; 108 PetscScalar *val; 109 PetscBool matsolve_iscalled, matmatsolve_iscalled; 110 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 111 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 112 sScalePermstruct_t sScalePermstruct; 113 sLUstruct_t sLUstruct; 114 sSOLVEstruct_t sSOLVEstruct; 115 float *sval; 116 PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */ 117 float *sbptr; 118 #endif 119 } Mat_SuperLU_DIST; 120 121 static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 122 { 123 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 124 125 PetscFunctionBegin; 126 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 131 { 132 PetscFunctionBegin; 133 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 134 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 138 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 139 typedef struct { 140 MPI_Comm comm; 141 PetscBool busy; 142 gridinfo_t grid; 143 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 144 PetscBool use3d; 145 gridinfo3d_t grid3d; 146 #endif 147 } PetscSuperLU_DIST; 148 149 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 150 151 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 152 { 153 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 154 155 PetscFunctionBegin; 156 PetscCheckReturnMPI(keyval == Petsc_Superlu_dist_keyval, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 157 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 158 if (context->use3d) { 159 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 160 } else 161 #endif 162 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 163 PetscCallMPIReturnMPI(MPI_Comm_free(&context->comm)); 164 PetscCallReturnMPI(PetscFree(context)); 165 PetscFunctionReturn(MPI_SUCCESS); 166 } 167 168 /* 169 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 170 users who do not destroy all PETSc objects before PetscFinalize(). 171 172 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn() 173 can still check that the keyval associated with the MPI communicator is correct when the MPI 174 communicator is destroyed. 175 176 This is called in PetscFinalize() 177 */ 178 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 179 { 180 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 181 182 PetscFunctionBegin; 183 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 188 { 189 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 190 191 PetscFunctionBegin; 192 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 193 PetscCall(PetscFree(lu->sbptr)); 194 #endif 195 if (lu->CleanUpSuperLU_Dist) { 196 /* Deallocate SuperLU_DIST storage */ 197 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 198 if (lu->options.SolveInitialized) { 199 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 200 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 201 else 202 #endif 203 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 204 } 205 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 206 if (lu->use3d) { 207 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 208 if (lu->singleprecision) { 209 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 210 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d)); 211 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 212 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 213 } else 214 #endif 215 { 216 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 217 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 218 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 219 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 220 } 221 } else 222 #endif 223 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 224 if (lu->singleprecision) { 225 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct)); 226 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 227 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 228 } else 229 #endif 230 { 231 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 232 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 233 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 234 } 235 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 236 if (lu->comm_superlu) { 237 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 238 if (lu->use3d) { 239 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 240 } else 241 #endif 242 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 243 } 244 } 245 /* 246 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 247 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 248 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 249 * is off, and the communicator was not released or marked as "not busy " in the old code. 250 * Here we try to release comm regardless. 251 */ 252 if (lu->comm_superlu) { 253 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 254 } else { 255 PetscSuperLU_DIST *context; 256 MPI_Comm comm; 257 PetscMPIInt iflg; 258 259 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 260 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg)); 261 if (iflg) context->busy = PETSC_FALSE; 262 } 263 264 PetscCall(PetscFree(A->data)); 265 /* clear composed functions */ 266 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 272 { 273 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 274 PetscInt m = A->rmap->n; 275 SuperLUStat_t stat; 276 PetscReal berr[1]; 277 PetscScalar *bptr = NULL; 278 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 279 float sberr[1]; 280 #endif 281 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 282 static PetscBool cite = PETSC_FALSE; 283 284 PetscFunctionBegin; 285 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 286 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 287 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 288 &cite)); 289 290 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 291 /* see comments in MatMatSolve() */ 292 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 293 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 294 else 295 #endif 296 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 297 lu->options.SolveInitialized = NO; 298 } 299 PetscCall(VecCopy(b_mpi, x)); 300 PetscCall(VecGetArray(x, &bptr)); 301 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 302 if (lu->singleprecision) { 303 PetscInt n; 304 PetscCall(VecGetLocalSize(x, &n)); 305 if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr)); 306 for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 307 } 308 #endif 309 310 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 311 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 312 if (lu->use3d) { 313 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 314 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 315 else 316 #endif 317 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 318 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx3d fails, info: %d", info); 319 } else 320 #endif 321 { 322 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 323 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 324 else 325 #endif 326 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 327 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx fails, info: %d", info); 328 } 329 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 330 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 331 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 332 if (lu->singleprecision) { 333 PetscInt n; 334 PetscCall(VecGetLocalSize(x, &n)); 335 for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i]; 336 } 337 #endif 338 PetscCall(VecRestoreArray(x, &bptr)); 339 lu->matsolve_iscalled = PETSC_TRUE; 340 lu->matmatsolve_iscalled = PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 345 { 346 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 347 PetscInt m = A->rmap->n, nrhs; 348 SuperLUStat_t stat; 349 PetscReal berr[1]; 350 PetscScalar *bptr; 351 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 352 PetscBool flg; 353 354 PetscFunctionBegin; 355 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 356 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single"); 357 #endif 358 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 359 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 360 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 361 if (X != B_mpi) { 362 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 363 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 364 } 365 366 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 367 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 368 thus destroy it and create a new SOLVEstruct. 369 Otherwise it may result in memory corruption or incorrect solution 370 See src/mat/tests/ex125.c */ 371 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 372 lu->options.SolveInitialized = NO; 373 } 374 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 375 376 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 377 378 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 379 PetscCall(MatDenseGetArray(X, &bptr)); 380 381 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 382 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 383 else 384 #endif 385 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 386 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 387 PetscCall(MatDenseRestoreArray(X, &bptr)); 388 389 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 390 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 391 lu->matsolve_iscalled = PETSC_FALSE; 392 lu->matmatsolve_iscalled = PETSC_TRUE; 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 /* 397 input: 398 F: numeric Cholesky factor 399 output: 400 nneg: total number of negative pivots 401 nzero: total number of zero pivots 402 npos: (global dimension of F) - nneg - nzero 403 */ 404 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 405 { 406 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 407 PetscScalar *diagU = NULL; 408 PetscInt M, i, neg = 0, zero = 0, pos = 0; 409 PetscReal r; 410 411 PetscFunctionBegin; 412 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 413 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single"); 414 #endif 415 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 416 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 417 PetscCall(MatGetSize(F, &M, NULL)); 418 PetscCall(PetscMalloc1(M, &diagU)); 419 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 420 for (i = 0; i < M; i++) { 421 #if defined(PETSC_USE_COMPLEX) 422 r = PetscAbsReal(PetscImaginaryPart(diagU[i])); 423 PetscCheck(r < 1000 * PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)PetscImaginaryPart(diagU[i])); 424 r = PetscRealPart(diagU[i]); 425 #else 426 r = diagU[i]; 427 #endif 428 if (r > 0) { 429 pos++; 430 } else if (r < 0) { 431 neg++; 432 } else zero++; 433 } 434 435 PetscCall(PetscFree(diagU)); 436 if (nneg) *nneg = neg; 437 if (nzero) *nzero = zero; 438 if (npos) *npos = pos; 439 PetscFunctionReturn(PETSC_SUCCESS); 440 } 441 442 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 443 { 444 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 445 Mat Aloc; 446 const PetscScalar *av; 447 const PetscInt *ai = NULL, *aj = NULL; 448 PetscInt nz, dummy; 449 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 450 SuperLUStat_t stat; 451 PetscReal *berr = 0; 452 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 453 float *sberr = 0; 454 #endif 455 PetscBool ismpiaij, isseqaij, flg; 456 457 PetscFunctionBegin; 458 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 459 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 460 if (ismpiaij) { 461 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 462 } else if (isseqaij) { 463 PetscCall(PetscObjectReference((PetscObject)A)); 464 Aloc = A; 465 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 466 467 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 468 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 469 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 470 nz = ai[Aloc->rmap->n]; 471 472 /* Allocations for A_sup */ 473 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 474 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 475 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 476 else 477 #endif 478 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 479 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 480 if (lu->FactPattern == SamePattern_SameRowPerm) { 481 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 482 } else if (lu->FactPattern == SamePattern) { 483 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 484 if (lu->use3d) { 485 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 486 if (lu->singleprecision) { 487 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 488 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 489 } else 490 #endif 491 { 492 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 493 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 494 } 495 } else 496 #endif 497 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 498 if (lu->singleprecision) 499 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 500 else 501 #endif 502 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 503 lu->options.Fact = SamePattern; 504 } else if (lu->FactPattern == DOFACT) { 505 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 506 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 507 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 508 else 509 #endif 510 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 511 lu->options.Fact = DOFACT; 512 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 513 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 514 else 515 #endif 516 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 517 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 518 } 519 520 /* Copy AIJ matrix to superlu_dist matrix */ 521 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 522 PetscCall(PetscArraycpy(lu->col, aj, nz)); 523 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 524 if (lu->singleprecision) 525 for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 526 else 527 #endif 528 PetscCall(PetscArraycpy(lu->val, av, nz)); 529 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 530 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 531 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 532 PetscCall(MatDestroy(&Aloc)); 533 534 /* Create and setup A_sup */ 535 if (lu->options.Fact == DOFACT) { 536 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 537 if (lu->singleprecision) 538 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE)); 539 else 540 #endif 541 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 542 } 543 544 /* Factor the matrix. */ 545 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 546 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 547 if (lu->use3d) { 548 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 549 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 550 else 551 #endif 552 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 553 } else 554 #endif 555 { 556 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 557 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 558 else 559 #endif 560 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 561 } 562 if (sinfo > 0) { 563 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 564 if (sinfo <= lu->A_sup.ncol) { 565 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 566 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 567 } else if (sinfo > lu->A_sup.ncol) { 568 /* 569 number of bytes allocated when memory allocation 570 failure occurred, plus A->ncol. 571 */ 572 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 573 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 574 } 575 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 576 577 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 578 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 579 F->assembled = PETSC_TRUE; 580 F->preallocated = PETSC_TRUE; 581 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 582 PetscFunctionReturn(PETSC_SUCCESS); 583 } 584 585 /* Note the PETSc r and c permutations are ignored */ 586 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 587 { 588 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 589 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 590 PetscMPIInt size, iflg; 591 PetscBool flg, set; 592 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 593 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 594 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 595 MPI_Comm comm; 596 PetscSuperLU_DIST *context = NULL; 597 598 PetscFunctionBegin; 599 /* Set options to F */ 600 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 601 PetscCallMPI(MPI_Comm_size(comm, &size)); 602 603 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 604 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 605 if (set && !flg) lu->options.Equil = NO; 606 607 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 608 if (flg) { 609 switch (indx) { 610 case 0: 611 lu->options.RowPerm = NOROWPERM; 612 break; 613 case 1: 614 lu->options.RowPerm = LargeDiag_MC64; 615 break; 616 case 2: 617 lu->options.RowPerm = LargeDiag_AWPM; 618 break; 619 case 3: 620 lu->options.RowPerm = MY_PERMR; 621 break; 622 default: 623 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 624 } 625 } 626 627 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 628 if (flg) { 629 switch (indx) { 630 case 0: 631 lu->options.ColPerm = NATURAL; 632 break; 633 case 1: 634 lu->options.ColPerm = MMD_AT_PLUS_A; 635 break; 636 case 2: 637 lu->options.ColPerm = MMD_ATA; 638 break; 639 case 3: 640 lu->options.ColPerm = METIS_AT_PLUS_A; 641 break; 642 case 4: 643 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 644 break; 645 default: 646 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 647 } 648 } 649 650 lu->options.ReplaceTinyPivot = NO; 651 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 652 if (set && flg) lu->options.ReplaceTinyPivot = YES; 653 654 lu->options.ParSymbFact = NO; 655 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 656 if (set && flg && size > 1) { 657 #if defined(PETSC_HAVE_PARMETIS) 658 lu->options.ParSymbFact = YES; 659 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 660 #else 661 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 662 #endif 663 } 664 665 lu->FactPattern = SamePattern; 666 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 667 if (flg) { 668 switch (indx) { 669 case 0: 670 lu->FactPattern = SamePattern; 671 break; 672 case 1: 673 lu->FactPattern = SamePattern_SameRowPerm; 674 break; 675 case 2: 676 lu->FactPattern = DOFACT; 677 break; 678 } 679 } 680 681 lu->options.IterRefine = NOREFINE; 682 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 683 if (set && flg) lu->options.IterRefine = SLU_DOUBLE; 684 685 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 686 else lu->options.PrintStat = NO; 687 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 688 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 689 690 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(8, 0, 0) 691 lu->options.superlu_acc_offload = 1; 692 PetscCall(PetscOptionsBool("-mat_superlu_dist_gpuoffload", "Offload factorization onto the GPUs", "None", (PetscBool)lu->options.superlu_acc_offload, (PetscBool *)&lu->options.superlu_acc_offload, NULL)); 693 #endif 694 695 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg)); 696 if (!iflg || context->busy) { /* additional options */ 697 if (!iflg) { 698 PetscCall(PetscNew(&context)); 699 context->busy = PETSC_TRUE; 700 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 701 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 702 } else { 703 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 704 } 705 706 /* Default number of process columns and rows */ 707 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 708 if (!lu->nprow) lu->nprow = 1; 709 while (lu->nprow > 0) { 710 lu->npcol = (int_t)(size / lu->nprow); 711 if (size == lu->nprow * lu->npcol) break; 712 lu->nprow--; 713 } 714 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 715 lu->use3d = PETSC_FALSE; 716 lu->npdep = 1; 717 #endif 718 719 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 720 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 721 if (lu->use3d) { 722 PetscInt t; 723 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 724 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 725 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 726 if (lu->npdep > 1) { 727 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 728 if (!lu->nprow) lu->nprow = 1; 729 while (lu->nprow > 0) { 730 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 731 if (size == lu->nprow * lu->npcol * lu->npdep) break; 732 lu->nprow--; 733 } 734 } 735 } 736 #endif 737 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 738 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 739 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 740 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 741 #else 742 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 743 #endif 744 /* end of adding additional options */ 745 746 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 747 if (lu->use3d) { 748 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, (int)lu->npdep, &lu->grid3d)); 749 if (context) { 750 context->grid3d = lu->grid3d; 751 context->use3d = lu->use3d; 752 } 753 } else { 754 #endif 755 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, &lu->grid)); 756 if (context) context->grid = lu->grid; 757 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 758 } 759 #endif 760 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 761 if (iflg) { 762 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 763 } else { 764 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 765 } 766 } else { /* (iflg && !context->busy) */ 767 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n")); 768 context->busy = PETSC_TRUE; 769 lu->grid = context->grid; 770 } 771 PetscOptionsEnd(); 772 773 /* Initialize ScalePermstruct and LUstruct. */ 774 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 775 if (lu->singleprecision) { 776 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct)); 777 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct)); 778 } else 779 #endif 780 { 781 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 782 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 783 } 784 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 785 F->ops->solve = MatSolve_SuperLU_DIST; 786 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 787 F->ops->getinertia = NULL; 788 789 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 790 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 791 PetscFunctionReturn(PETSC_SUCCESS); 792 } 793 794 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 795 { 796 PetscFunctionBegin; 797 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 798 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 799 PetscFunctionReturn(PETSC_SUCCESS); 800 } 801 802 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 803 { 804 PetscFunctionBegin; 805 *type = MATSOLVERSUPERLU_DIST; 806 PetscFunctionReturn(PETSC_SUCCESS); 807 } 808 809 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 810 { 811 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 812 superlu_dist_options_t options; 813 814 PetscFunctionBegin; 815 /* check if matrix is superlu_dist type */ 816 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 817 818 options = lu->options; 819 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 820 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 821 * format spec for int64_t is set to %d for whatever reason */ 822 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 823 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 824 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 825 #endif 826 827 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 828 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 829 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 830 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 831 832 switch (options.RowPerm) { 833 case NOROWPERM: 834 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 835 break; 836 case LargeDiag_MC64: 837 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 838 break; 839 case LargeDiag_AWPM: 840 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 841 break; 842 case MY_PERMR: 843 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 844 break; 845 default: 846 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 847 } 848 849 switch (options.ColPerm) { 850 case NATURAL: 851 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 852 break; 853 case MMD_AT_PLUS_A: 854 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 855 break; 856 case MMD_ATA: 857 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 858 break; 859 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 860 case METIS_AT_PLUS_A: 861 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 862 break; 863 case PARMETIS: 864 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 865 break; 866 default: 867 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 868 } 869 870 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 871 872 if (lu->FactPattern == SamePattern) { 873 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 874 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 875 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 876 } else if (lu->FactPattern == DOFACT) { 877 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 878 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 879 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 880 if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n")); 881 #endif 882 PetscFunctionReturn(PETSC_SUCCESS); 883 } 884 885 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 886 { 887 PetscBool isascii; 888 PetscViewerFormat format; 889 890 PetscFunctionBegin; 891 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 892 if (isascii) { 893 PetscCall(PetscViewerGetFormat(viewer, &format)); 894 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 895 } 896 PetscFunctionReturn(PETSC_SUCCESS); 897 } 898 899 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 900 { 901 Mat B; 902 Mat_SuperLU_DIST *lu; 903 PetscInt M = A->rmap->N, N = A->cmap->N; 904 PetscMPIInt size; 905 superlu_dist_options_t options; 906 PetscBool flg; 907 PetscPrecision precision = PETSC_PRECISION_INVALID; 908 909 PetscFunctionBegin; 910 /* Create the factorization matrix */ 911 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 912 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 913 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 914 PetscCall(MatSetUp(B)); 915 B->ops->getinfo = MatGetInfo_External; 916 B->ops->view = MatView_SuperLU_DIST; 917 B->ops->destroy = MatDestroy_SuperLU_DIST; 918 919 /* set_default_options_dist() sets the default input options to the following values: 920 options.Fact = DOFACT; 921 options.Equil = YES; 922 options.ParSymbFact = NO; 923 options.ColPerm = METIS_AT_PLUS_A; 924 options.RowPerm = LargeDiag_MC64; 925 options.ReplaceTinyPivot = NO; 926 options.IterRefine = SLU_DOUBLE; 927 options.Trans = NOTRANS; 928 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 929 options.RefineInitialized = NO; 930 options.PrintStat = YES; 931 options.superlu_acc_offload = 1; 932 options.SymPattern = NO; 933 */ 934 set_default_options_dist(&options); 935 936 B->trivialsymbolic = PETSC_TRUE; 937 if (ftype == MAT_FACTOR_LU) { 938 B->factortype = MAT_FACTOR_LU; 939 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 940 } else { 941 B->factortype = MAT_FACTOR_CHOLESKY; 942 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 943 options.SymPattern = YES; 944 } 945 946 /* set solvertype */ 947 PetscCall(PetscFree(B->solvertype)); 948 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 949 950 PetscCall(PetscNew(&lu)); 951 B->data = lu; 952 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 953 954 lu->options = options; 955 lu->options.Fact = DOFACT; 956 lu->matsolve_iscalled = PETSC_FALSE; 957 lu->matmatsolve_iscalled = PETSC_FALSE; 958 959 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "SuperLU_DIST Options", "Mat"); 960 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by SuperLU_DIST", "MATSOLVERSUPERLU_DIST", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, &flg)); 961 PetscOptionsEnd(); 962 if (flg) { 963 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single or double as option for SuperLU_DIST"); 964 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 965 lu->singleprecision = (PetscBool)(precision == PETSC_PRECISION_SINGLE); // It also implies PetscReal is not single; not merely SuperLU_DIST is running in single 966 #endif 967 } 968 969 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 970 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 971 972 *F = B; 973 PetscFunctionReturn(PETSC_SUCCESS); 974 } 975 976 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 977 { 978 PetscFunctionBegin; 979 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 980 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 981 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 982 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 983 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 984 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL)); 985 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 986 } 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 /*MC 991 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 992 993 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 994 995 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 996 997 Works with `MATAIJ` matrices 998 999 Options Database Keys: 1000 + -mat_superlu_dist_r <n> - number of rows in processor partition 1001 . -mat_superlu_dist_c <n> - number of columns in processor partition 1002 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 1003 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 1004 . -mat_superlu_dist_equil - equilibrate the matrix 1005 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 1006 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 1007 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 1008 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 1009 . -mat_superlu_dist_iterrefine - use iterative refinement 1010 . -mat_superlu_dist_printstat - print factorization information 1011 . -mat_superlu_dist_gpuoffload - offload factorization onto the GPUs, requires SuperLU_DIST 8.0.0 or later 1012 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision 1013 1014 Level: beginner 1015 1016 Note: 1017 If PETSc was configured with `--with-cuda` then this solver will use the GPUs by default. 1018 1019 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 1020 M*/ 1021