1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #define CASTDOUBLECOMPLEX (doublecomplex *) 13 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 14 #include <superlu_zdefs.h> 15 #define LUstructInit zLUstructInit 16 #define ScalePermstructInit zScalePermstructInit 17 #define ScalePermstructFree zScalePermstructFree 18 #define LUstructFree zLUstructFree 19 #define Destroy_LU zDestroy_LU 20 #define ScalePermstruct_t zScalePermstruct_t 21 #define LUstruct_t zLUstruct_t 22 #define SOLVEstruct_t zSOLVEstruct_t 23 #define SolveFinalize zSolveFinalize 24 #define pGetDiagU pzGetDiagU 25 #define pgssvx pzgssvx 26 #define allocateA_dist zallocateA_dist 27 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 28 #define SLU SLU_Z 29 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 30 #define DeAllocLlu_3d zDeAllocLlu_3d 31 #define DeAllocGlu_3d zDeAllocGlu_3d 32 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 33 #define pgssvx3d pzgssvx3d 34 #endif 35 #elif defined(PETSC_USE_REAL_SINGLE) 36 #define CASTDOUBLECOMPLEX 37 #define CASTDOUBLECOMPLEXSTAR 38 #include <superlu_sdefs.h> 39 #define LUstructInit sLUstructInit 40 #define ScalePermstructInit sScalePermstructInit 41 #define ScalePermstructFree sScalePermstructFree 42 #define LUstructFree sLUstructFree 43 #define Destroy_LU sDestroy_LU 44 #define ScalePermstruct_t sScalePermstruct_t 45 #define LUstruct_t sLUstruct_t 46 #define SOLVEstruct_t sSOLVEstruct_t 47 #define SolveFinalize sSolveFinalize 48 #define pGetDiagU psGetDiagU 49 #define pgssvx psgssvx 50 #define allocateA_dist sallocateA_dist 51 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 52 #define SLU SLU_S 53 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 54 #define DeAllocLlu_3d sDeAllocLlu_3d 55 #define DeAllocGlu_3d sDeAllocGlu_3d 56 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 57 #define pgssvx3d psgssvx3d 58 #endif 59 #else 60 #define CASTDOUBLECOMPLEX 61 #define CASTDOUBLECOMPLEXSTAR 62 #include <superlu_ddefs.h> 63 #define LUstructInit dLUstructInit 64 #define ScalePermstructInit dScalePermstructInit 65 #define ScalePermstructFree dScalePermstructFree 66 #define LUstructFree dLUstructFree 67 #define Destroy_LU dDestroy_LU 68 #define ScalePermstruct_t dScalePermstruct_t 69 #define LUstruct_t dLUstruct_t 70 #define SOLVEstruct_t dSOLVEstruct_t 71 #define SolveFinalize dSolveFinalize 72 #define pGetDiagU pdGetDiagU 73 #define pgssvx pdgssvx 74 #define allocateA_dist dallocateA_dist 75 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 76 #define SLU SLU_D 77 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 78 #define DeAllocLlu_3d dDeAllocLlu_3d 79 #define DeAllocGlu_3d dDeAllocGlu_3d 80 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 81 #define pgssvx3d pdgssvx3d 82 #endif 83 #endif 84 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 85 #include <superlu_sdefs.h> 86 #endif 87 EXTERN_C_END 88 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 89 90 typedef struct { 91 int_t nprow, npcol, *row, *col; 92 gridinfo_t grid; 93 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 94 PetscBool use3d; 95 int_t npdep; /* replication factor, must be power of two */ 96 gridinfo3d_t grid3d; 97 #endif 98 superlu_dist_options_t options; 99 SuperMatrix A_sup; 100 ScalePermstruct_t ScalePermstruct; 101 LUstruct_t LUstruct; 102 int StatPrint; 103 SOLVEstruct_t SOLVEstruct; 104 fact_t FactPattern; 105 MPI_Comm comm_superlu; 106 PetscScalar *val; 107 PetscBool matsolve_iscalled, matmatsolve_iscalled; 108 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 109 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 110 sScalePermstruct_t sScalePermstruct; 111 sLUstruct_t sLUstruct; 112 sSOLVEstruct_t sSOLVEstruct; 113 float *sval; 114 PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */ 115 float *sbptr; 116 #endif 117 } Mat_SuperLU_DIST; 118 119 static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 120 { 121 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 122 123 PetscFunctionBegin; 124 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 129 { 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 132 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 133 PetscFunctionReturn(PETSC_SUCCESS); 134 } 135 136 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 137 typedef struct { 138 MPI_Comm comm; 139 PetscBool busy; 140 gridinfo_t grid; 141 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 142 PetscBool use3d; 143 gridinfo3d_t grid3d; 144 #endif 145 } PetscSuperLU_DIST; 146 147 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 148 149 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 150 { 151 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 152 153 PetscFunctionBegin; 154 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 155 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 156 if (context->use3d) { 157 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 158 } else 159 #endif 160 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 161 PetscCallMPI(MPI_Comm_free(&context->comm)); 162 PetscCall(PetscFree(context)); 163 PetscFunctionReturn(MPI_SUCCESS); 164 } 165 166 /* 167 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 168 users who do not destroy all PETSc objects before PetscFinalize(). 169 170 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn() 171 can still check that the keyval associated with the MPI communicator is correct when the MPI 172 communicator is destroyed. 173 174 This is called in PetscFinalize() 175 */ 176 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 177 { 178 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 179 180 PetscFunctionBegin; 181 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 186 { 187 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 188 189 PetscFunctionBegin; 190 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 191 PetscCall(PetscFree(lu->sbptr)); 192 #endif 193 if (lu->CleanUpSuperLU_Dist) { 194 /* Deallocate SuperLU_DIST storage */ 195 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 196 if (lu->options.SolveInitialized) { 197 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 198 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 199 else 200 #endif 201 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 202 } 203 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 204 if (lu->use3d) { 205 if (lu->grid3d.zscp.Iam == 0) { 206 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 207 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 208 else 209 #endif 210 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 211 } else { 212 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 213 if (lu->singleprecision) { 214 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d)); 215 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct)); 216 } else 217 #endif 218 { 219 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 220 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 221 } 222 } 223 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 224 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d)); 225 else 226 #endif 227 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 228 } else 229 #endif 230 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 231 if (lu->singleprecision) { 232 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct)); 233 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 234 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 235 } else 236 #endif 237 { 238 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 239 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 240 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 241 } 242 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 243 if (lu->comm_superlu) { 244 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 245 if (lu->use3d) { 246 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 247 } else 248 #endif 249 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 250 } 251 } 252 /* 253 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 254 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 255 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 256 * is off, and the communicator was not released or marked as "not busy " in the old code. 257 * Here we try to release comm regardless. 258 */ 259 if (lu->comm_superlu) { 260 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 261 } else { 262 PetscSuperLU_DIST *context; 263 MPI_Comm comm; 264 PetscMPIInt flg; 265 266 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 267 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 268 if (flg) context->busy = PETSC_FALSE; 269 } 270 271 PetscCall(PetscFree(A->data)); 272 /* clear composed functions */ 273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 275 PetscFunctionReturn(PETSC_SUCCESS); 276 } 277 278 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 279 { 280 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 281 PetscInt m = A->rmap->n; 282 SuperLUStat_t stat; 283 PetscReal berr[1]; 284 PetscScalar *bptr = NULL; 285 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 286 float sberr[1]; 287 #endif 288 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 289 static PetscBool cite = PETSC_FALSE; 290 291 PetscFunctionBegin; 292 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 293 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 294 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 295 &cite)); 296 297 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 298 /* see comments in MatMatSolve() */ 299 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 300 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 301 else 302 #endif 303 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 304 lu->options.SolveInitialized = NO; 305 } 306 PetscCall(VecCopy(b_mpi, x)); 307 PetscCall(VecGetArray(x, &bptr)); 308 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 309 if (lu->singleprecision) { 310 PetscInt n; 311 PetscCall(VecGetLocalSize(x, &n)); 312 if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr)); 313 for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 314 } 315 #endif 316 317 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 318 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 319 if (lu->use3d) { 320 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 321 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 322 else 323 #endif 324 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 325 } else 326 #endif 327 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 328 if (lu->singleprecision) 329 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 330 else 331 #endif 332 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 333 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 334 335 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 336 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 337 338 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 339 if (lu->singleprecision) { 340 PetscInt n; 341 PetscCall(VecGetLocalSize(x, &n)); 342 for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i]; 343 } 344 #endif 345 PetscCall(VecRestoreArray(x, &bptr)); 346 lu->matsolve_iscalled = PETSC_TRUE; 347 lu->matmatsolve_iscalled = PETSC_FALSE; 348 PetscFunctionReturn(PETSC_SUCCESS); 349 } 350 351 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 352 { 353 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 354 PetscInt m = A->rmap->n, nrhs; 355 SuperLUStat_t stat; 356 PetscReal berr[1]; 357 PetscScalar *bptr; 358 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 359 PetscBool flg; 360 361 PetscFunctionBegin; 362 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 363 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single"); 364 #endif 365 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 366 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 367 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 368 if (X != B_mpi) { 369 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 370 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 371 } 372 373 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 374 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 375 thus destroy it and create a new SOLVEstruct. 376 Otherwise it may result in memory corruption or incorrect solution 377 See src/mat/tests/ex125.c */ 378 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 379 lu->options.SolveInitialized = NO; 380 } 381 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 382 383 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 384 385 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 386 PetscCall(MatDenseGetArray(X, &bptr)); 387 388 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 389 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 390 else 391 #endif 392 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 393 394 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 395 PetscCall(MatDenseRestoreArray(X, &bptr)); 396 397 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 398 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 399 lu->matsolve_iscalled = PETSC_FALSE; 400 lu->matmatsolve_iscalled = PETSC_TRUE; 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 /* 405 input: 406 F: numeric Cholesky factor 407 output: 408 nneg: total number of negative pivots 409 nzero: total number of zero pivots 410 npos: (global dimension of F) - nneg - nzero 411 */ 412 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 413 { 414 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 415 PetscScalar *diagU = NULL; 416 PetscInt M, i, neg = 0, zero = 0, pos = 0; 417 PetscReal r; 418 419 PetscFunctionBegin; 420 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 421 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single"); 422 #endif 423 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 424 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 425 PetscCall(MatGetSize(F, &M, NULL)); 426 PetscCall(PetscMalloc1(M, &diagU)); 427 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 428 for (i = 0; i < M; i++) { 429 #if defined(PETSC_USE_COMPLEX) 430 r = PetscImaginaryPart(diagU[i]) / 10.0; 431 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 432 r = PetscRealPart(diagU[i]); 433 #else 434 r = diagU[i]; 435 #endif 436 if (r > 0) { 437 pos++; 438 } else if (r < 0) { 439 neg++; 440 } else zero++; 441 } 442 443 PetscCall(PetscFree(diagU)); 444 if (nneg) *nneg = neg; 445 if (nzero) *nzero = zero; 446 if (npos) *npos = pos; 447 PetscFunctionReturn(PETSC_SUCCESS); 448 } 449 450 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 451 { 452 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 453 Mat Aloc; 454 const PetscScalar *av; 455 const PetscInt *ai = NULL, *aj = NULL; 456 PetscInt nz, dummy; 457 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 458 SuperLUStat_t stat; 459 PetscReal *berr = 0; 460 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 461 float *sberr = 0; 462 #endif 463 PetscBool ismpiaij, isseqaij, flg; 464 465 PetscFunctionBegin; 466 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 467 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 468 if (ismpiaij) { 469 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 470 } else if (isseqaij) { 471 PetscCall(PetscObjectReference((PetscObject)A)); 472 Aloc = A; 473 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 474 475 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 476 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 477 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 478 nz = ai[Aloc->rmap->n]; 479 480 /* Allocations for A_sup */ 481 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 482 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 483 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 484 else 485 #endif 486 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 487 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 488 if (lu->FactPattern == SamePattern_SameRowPerm) { 489 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 490 } else if (lu->FactPattern == SamePattern) { 491 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 492 if (lu->use3d) { 493 if (lu->grid3d.zscp.Iam == 0) { 494 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 495 if (lu->singleprecision) { 496 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 497 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 498 } else 499 #endif 500 { 501 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 502 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 503 } 504 } else { 505 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 506 if (lu->singleprecision) { 507 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d)); 508 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct)); 509 } else 510 #endif 511 { 512 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 513 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 514 } 515 } 516 } else 517 #endif 518 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 519 if (lu->singleprecision) 520 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 521 else 522 #endif 523 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 524 lu->options.Fact = SamePattern; 525 } else if (lu->FactPattern == DOFACT) { 526 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 527 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 528 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 529 else 530 #endif 531 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 532 lu->options.Fact = DOFACT; 533 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 534 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 535 else 536 #endif 537 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 538 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 539 } 540 541 /* Copy AIJ matrix to superlu_dist matrix */ 542 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 543 PetscCall(PetscArraycpy(lu->col, aj, nz)); 544 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 545 if (lu->singleprecision) 546 for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 547 else 548 #endif 549 PetscCall(PetscArraycpy(lu->val, av, nz)); 550 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 551 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 552 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 553 PetscCall(MatDestroy(&Aloc)); 554 555 /* Create and setup A_sup */ 556 if (lu->options.Fact == DOFACT) { 557 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 558 if (lu->singleprecision) 559 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE)); 560 else 561 #endif 562 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 563 } 564 565 /* Factor the matrix. */ 566 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 567 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 568 if (lu->use3d) { 569 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 570 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 571 else 572 #endif 573 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 574 } else 575 #endif 576 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 577 if (lu->singleprecision) 578 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 579 else 580 #endif 581 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 582 if (sinfo > 0) { 583 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 584 if (sinfo <= lu->A_sup.ncol) { 585 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 586 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 587 } else if (sinfo > lu->A_sup.ncol) { 588 /* 589 number of bytes allocated when memory allocation 590 failure occurred, plus A->ncol. 591 */ 592 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 593 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 594 } 595 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 596 597 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 598 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 599 F->assembled = PETSC_TRUE; 600 F->preallocated = PETSC_TRUE; 601 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 /* Note the Petsc r and c permutations are ignored */ 606 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 607 { 608 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 609 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 610 PetscMPIInt size, mpiflg; 611 PetscBool flg, set; 612 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 613 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 614 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 615 MPI_Comm comm; 616 PetscSuperLU_DIST *context = NULL; 617 618 PetscFunctionBegin; 619 /* Set options to F */ 620 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 621 PetscCallMPI(MPI_Comm_size(comm, &size)); 622 623 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 624 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 625 if (set && !flg) lu->options.Equil = NO; 626 627 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 628 if (flg) { 629 switch (indx) { 630 case 0: 631 lu->options.RowPerm = NOROWPERM; 632 break; 633 case 1: 634 lu->options.RowPerm = LargeDiag_MC64; 635 break; 636 case 2: 637 lu->options.RowPerm = LargeDiag_AWPM; 638 break; 639 case 3: 640 lu->options.RowPerm = MY_PERMR; 641 break; 642 default: 643 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 644 } 645 } 646 647 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 648 if (flg) { 649 switch (indx) { 650 case 0: 651 lu->options.ColPerm = NATURAL; 652 break; 653 case 1: 654 lu->options.ColPerm = MMD_AT_PLUS_A; 655 break; 656 case 2: 657 lu->options.ColPerm = MMD_ATA; 658 break; 659 case 3: 660 lu->options.ColPerm = METIS_AT_PLUS_A; 661 break; 662 case 4: 663 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 664 break; 665 default: 666 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 667 } 668 } 669 670 lu->options.ReplaceTinyPivot = NO; 671 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 672 if (set && flg) lu->options.ReplaceTinyPivot = YES; 673 674 lu->options.ParSymbFact = NO; 675 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 676 if (set && flg && size > 1) { 677 #if defined(PETSC_HAVE_PARMETIS) 678 lu->options.ParSymbFact = YES; 679 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 680 #else 681 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 682 #endif 683 } 684 685 lu->FactPattern = SamePattern; 686 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 687 if (flg) { 688 switch (indx) { 689 case 0: 690 lu->FactPattern = SamePattern; 691 break; 692 case 1: 693 lu->FactPattern = SamePattern_SameRowPerm; 694 break; 695 case 2: 696 lu->FactPattern = DOFACT; 697 break; 698 } 699 } 700 701 lu->options.IterRefine = NOREFINE; 702 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 703 if (set) { 704 if (flg) lu->options.IterRefine = SLU_DOUBLE; 705 else lu->options.IterRefine = NOREFINE; 706 } 707 708 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 709 else lu->options.PrintStat = NO; 710 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 711 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 712 713 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 714 if (!mpiflg || context->busy) { /* additional options */ 715 if (!mpiflg) { 716 PetscCall(PetscNew(&context)); 717 context->busy = PETSC_TRUE; 718 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 719 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 720 } else { 721 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 722 } 723 724 /* Default number of process columns and rows */ 725 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 726 if (!lu->nprow) lu->nprow = 1; 727 while (lu->nprow > 0) { 728 lu->npcol = (int_t)(size / lu->nprow); 729 if (size == lu->nprow * lu->npcol) break; 730 lu->nprow--; 731 } 732 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 733 lu->use3d = PETSC_FALSE; 734 lu->npdep = 1; 735 #endif 736 737 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 738 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 739 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 740 if (lu->use3d) { 741 PetscInt t; 742 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 743 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 744 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 745 if (lu->npdep > 1) { 746 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 747 if (!lu->nprow) lu->nprow = 1; 748 while (lu->nprow > 0) { 749 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 750 if (size == lu->nprow * lu->npcol * lu->npdep) break; 751 lu->nprow--; 752 } 753 } 754 } 755 #endif 756 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 757 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 758 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 759 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 760 #else 761 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 762 #endif 763 /* end of adding additional options */ 764 765 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 766 if (lu->use3d) { 767 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 768 if (context) { 769 context->grid3d = lu->grid3d; 770 context->use3d = lu->use3d; 771 } 772 } else { 773 #endif 774 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 775 if (context) context->grid = lu->grid; 776 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 777 } 778 #endif 779 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 780 if (mpiflg) { 781 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 782 } else { 783 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 784 } 785 } else { /* (mpiflg && !context->busy) */ 786 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n")); 787 context->busy = PETSC_TRUE; 788 lu->grid = context->grid; 789 } 790 PetscOptionsEnd(); 791 792 /* Initialize ScalePermstruct and LUstruct. */ 793 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 794 if (lu->singleprecision) { 795 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct)); 796 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct)); 797 } else 798 #endif 799 { 800 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 801 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 802 } 803 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 804 F->ops->solve = MatSolve_SuperLU_DIST; 805 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 806 F->ops->getinertia = NULL; 807 808 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 809 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 814 { 815 PetscFunctionBegin; 816 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 817 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 822 { 823 PetscFunctionBegin; 824 *type = MATSOLVERSUPERLU_DIST; 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 829 { 830 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 831 superlu_dist_options_t options; 832 833 PetscFunctionBegin; 834 /* check if matrix is superlu_dist type */ 835 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 836 837 options = lu->options; 838 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 839 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 840 * format spec for int64_t is set to %d for whatever reason */ 841 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 842 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 843 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 844 #endif 845 846 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 847 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 848 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 849 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 850 851 switch (options.RowPerm) { 852 case NOROWPERM: 853 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 854 break; 855 case LargeDiag_MC64: 856 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 857 break; 858 case LargeDiag_AWPM: 859 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 860 break; 861 case MY_PERMR: 862 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 863 break; 864 default: 865 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 866 } 867 868 switch (options.ColPerm) { 869 case NATURAL: 870 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 871 break; 872 case MMD_AT_PLUS_A: 873 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 874 break; 875 case MMD_ATA: 876 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 877 break; 878 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 879 case METIS_AT_PLUS_A: 880 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 881 break; 882 case PARMETIS: 883 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 884 break; 885 default: 886 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 887 } 888 889 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 890 891 if (lu->FactPattern == SamePattern) { 892 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 893 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 894 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 895 } else if (lu->FactPattern == DOFACT) { 896 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 897 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 898 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 899 if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n")); 900 #endif 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 905 { 906 PetscBool iascii; 907 PetscViewerFormat format; 908 909 PetscFunctionBegin; 910 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 911 if (iascii) { 912 PetscCall(PetscViewerGetFormat(viewer, &format)); 913 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 914 } 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } 917 918 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 919 { 920 Mat B; 921 Mat_SuperLU_DIST *lu; 922 PetscInt M = A->rmap->N, N = A->cmap->N; 923 PetscMPIInt size; 924 superlu_dist_options_t options; 925 PetscBool flg; 926 char string[16]; 927 928 PetscFunctionBegin; 929 /* Create the factorization matrix */ 930 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 931 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 932 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 933 PetscCall(MatSetUp(B)); 934 B->ops->getinfo = MatGetInfo_External; 935 B->ops->view = MatView_SuperLU_DIST; 936 B->ops->destroy = MatDestroy_SuperLU_DIST; 937 938 /* Set the default input options: 939 options.Fact = DOFACT; 940 options.Equil = YES; 941 options.ParSymbFact = NO; 942 options.ColPerm = METIS_AT_PLUS_A; 943 options.RowPerm = LargeDiag_MC64; 944 options.ReplaceTinyPivot = YES; 945 options.IterRefine = DOUBLE; 946 options.Trans = NOTRANS; 947 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 948 options.RefineInitialized = NO; 949 options.PrintStat = YES; 950 options.SymPattern = NO; 951 */ 952 set_default_options_dist(&options); 953 954 B->trivialsymbolic = PETSC_TRUE; 955 if (ftype == MAT_FACTOR_LU) { 956 B->factortype = MAT_FACTOR_LU; 957 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 958 } else { 959 B->factortype = MAT_FACTOR_CHOLESKY; 960 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 961 options.SymPattern = YES; 962 } 963 964 /* set solvertype */ 965 PetscCall(PetscFree(B->solvertype)); 966 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 967 968 PetscCall(PetscNew(&lu)); 969 B->data = lu; 970 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 971 972 lu->options = options; 973 lu->options.Fact = DOFACT; 974 lu->matsolve_iscalled = PETSC_FALSE; 975 lu->matmatsolve_iscalled = PETSC_FALSE; 976 977 PetscCall(PetscOptionsGetString(NULL, NULL, "-pc_precision", string, sizeof(string), &flg)); 978 if (flg) { 979 PetscCall(PetscStrcasecmp(string, "single", &flg)); 980 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single as option for SuperLU_DIST"); 981 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 982 lu->singleprecision = PETSC_TRUE; 983 #endif 984 } 985 986 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 988 989 *F = B; 990 PetscFunctionReturn(PETSC_SUCCESS); 991 } 992 993 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 994 { 995 PetscFunctionBegin; 996 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 997 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 998 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 999 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 1000 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 1001 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL)); 1002 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 /*MC 1008 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 1009 1010 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 1011 1012 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 1013 1014 Works with `MATAIJ` matrices 1015 1016 Options Database Keys: 1017 + -mat_superlu_dist_r <n> - number of rows in processor partition 1018 . -mat_superlu_dist_c <n> - number of columns in processor partition 1019 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 1020 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 1021 . -mat_superlu_dist_equil - equilibrate the matrix 1022 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 1023 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 1024 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 1025 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 1026 . -mat_superlu_dist_iterrefine - use iterative refinement 1027 . -mat_superlu_dist_printstat - print factorization information 1028 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision. Currently this does not accept an options prefix, so 1029 regardless of the `PC` prefix you must use no prefix here 1030 1031 Level: beginner 1032 1033 Note: 1034 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 1035 1036 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 1037 M*/ 1038