1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #define CASTDOUBLECOMPLEX (doublecomplex *) 13 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 14 #include <superlu_zdefs.h> 15 #define LUstructInit zLUstructInit 16 #define ScalePermstructInit zScalePermstructInit 17 #define ScalePermstructFree zScalePermstructFree 18 #define LUstructFree zLUstructFree 19 #define Destroy_LU zDestroy_LU 20 #define ScalePermstruct_t zScalePermstruct_t 21 #define LUstruct_t zLUstruct_t 22 #define SOLVEstruct_t zSOLVEstruct_t 23 #define SolveFinalize zSolveFinalize 24 #define pGetDiagU pzGetDiagU 25 #define pgssvx pzgssvx 26 #define allocateA_dist zallocateA_dist 27 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 28 #define SLU SLU_Z 29 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 30 #define DeAllocLlu_3d zDeAllocLlu_3d 31 #define DeAllocGlu_3d zDeAllocGlu_3d 32 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 33 #define pgssvx3d pzgssvx3d 34 #endif 35 #elif defined(PETSC_USE_REAL_SINGLE) 36 #define CASTDOUBLECOMPLEX 37 #define CASTDOUBLECOMPLEXSTAR 38 #include <superlu_sdefs.h> 39 #define LUstructInit sLUstructInit 40 #define ScalePermstructInit sScalePermstructInit 41 #define ScalePermstructFree sScalePermstructFree 42 #define LUstructFree sLUstructFree 43 #define Destroy_LU sDestroy_LU 44 #define ScalePermstruct_t sScalePermstruct_t 45 #define LUstruct_t sLUstruct_t 46 #define SOLVEstruct_t sSOLVEstruct_t 47 #define SolveFinalize sSolveFinalize 48 #define pGetDiagU psGetDiagU 49 #define pgssvx psgssvx 50 #define allocateA_dist sallocateA_dist 51 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 52 #define SLU SLU_S 53 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 54 #define DeAllocLlu_3d sDeAllocLlu_3d 55 #define DeAllocGlu_3d sDeAllocGlu_3d 56 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 57 #define pgssvx3d psgssvx3d 58 #endif 59 #else 60 #define CASTDOUBLECOMPLEX 61 #define CASTDOUBLECOMPLEXSTAR 62 #include <superlu_ddefs.h> 63 #define LUstructInit dLUstructInit 64 #define ScalePermstructInit dScalePermstructInit 65 #define ScalePermstructFree dScalePermstructFree 66 #define LUstructFree dLUstructFree 67 #define Destroy_LU dDestroy_LU 68 #define ScalePermstruct_t dScalePermstruct_t 69 #define LUstruct_t dLUstruct_t 70 #define SOLVEstruct_t dSOLVEstruct_t 71 #define SolveFinalize dSolveFinalize 72 #define pGetDiagU pdGetDiagU 73 #define pgssvx pdgssvx 74 #define allocateA_dist dallocateA_dist 75 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 76 #define SLU SLU_D 77 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 78 #define DeAllocLlu_3d dDeAllocLlu_3d 79 #define DeAllocGlu_3d dDeAllocGlu_3d 80 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 81 #define pgssvx3d pdgssvx3d 82 #endif 83 #endif 84 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 85 #include <superlu_sdefs.h> 86 #endif 87 EXTERN_C_END 88 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 89 90 typedef struct { 91 int_t nprow, npcol, *row, *col; 92 gridinfo_t grid; 93 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 94 PetscBool use3d; 95 int_t npdep; /* replication factor, must be power of two */ 96 gridinfo3d_t grid3d; 97 #endif 98 superlu_dist_options_t options; 99 SuperMatrix A_sup; 100 ScalePermstruct_t ScalePermstruct; 101 LUstruct_t LUstruct; 102 int StatPrint; 103 SOLVEstruct_t SOLVEstruct; 104 fact_t FactPattern; 105 MPI_Comm comm_superlu; 106 PetscScalar *val; 107 PetscBool matsolve_iscalled, matmatsolve_iscalled; 108 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 109 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 110 sScalePermstruct_t sScalePermstruct; 111 sLUstruct_t sLUstruct; 112 sSOLVEstruct_t sSOLVEstruct; 113 float *sval; 114 PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */ 115 float *sbptr; 116 #endif 117 } Mat_SuperLU_DIST; 118 119 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 120 { 121 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 122 123 PetscFunctionBegin; 124 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 129 { 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 132 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 133 PetscFunctionReturn(PETSC_SUCCESS); 134 } 135 136 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 137 typedef struct { 138 MPI_Comm comm; 139 PetscBool busy; 140 gridinfo_t grid; 141 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 142 PetscBool use3d; 143 gridinfo3d_t grid3d; 144 #endif 145 } PetscSuperLU_DIST; 146 147 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 148 149 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 150 { 151 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 152 153 PetscFunctionBegin; 154 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 155 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 156 if (context->use3d) { 157 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 158 } else 159 #endif 160 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 161 PetscCallMPI(MPI_Comm_free(&context->comm)); 162 PetscCall(PetscFree(context)); 163 PetscFunctionReturn(MPI_SUCCESS); 164 } 165 166 /* 167 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 168 users who do not destroy all PETSc objects before PetscFinalize(). 169 170 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 171 can still check that the keyval associated with the MPI communicator is correct when the MPI 172 communicator is destroyed. 173 174 This is called in PetscFinalize() 175 */ 176 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 177 { 178 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 179 180 PetscFunctionBegin; 181 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 186 { 187 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 188 189 PetscFunctionBegin; 190 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 191 PetscCall(PetscFree(lu->sbptr)); 192 #endif 193 if (lu->CleanUpSuperLU_Dist) { 194 /* Deallocate SuperLU_DIST storage */ 195 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 196 if (lu->options.SolveInitialized) { 197 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 198 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 199 else 200 #endif 201 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 202 } 203 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 204 if (lu->use3d) { 205 if (lu->grid3d.zscp.Iam == 0) { 206 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 207 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 208 else 209 #endif 210 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 211 } else { 212 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 213 if (lu->singleprecision) { 214 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d)); 215 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct)); 216 } else 217 #endif 218 { 219 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 220 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 221 } 222 } 223 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 224 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d)); 225 else 226 #endif 227 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 228 } else 229 #endif 230 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 231 if (lu->singleprecision) { 232 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct)); 233 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 234 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 235 } else 236 #endif 237 { 238 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 239 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 240 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 241 } 242 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 243 if (lu->comm_superlu) { 244 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 245 if (lu->use3d) { 246 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 247 } else 248 #endif 249 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 250 } 251 } 252 /* 253 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 254 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 255 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 256 * is off, and the communicator was not released or marked as "not busy " in the old code. 257 * Here we try to release comm regardless. 258 */ 259 if (lu->comm_superlu) { 260 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 261 } else { 262 PetscSuperLU_DIST *context; 263 MPI_Comm comm; 264 PetscMPIInt flg; 265 266 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 267 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 268 if (flg) context->busy = PETSC_FALSE; 269 } 270 271 PetscCall(PetscFree(A->data)); 272 /* clear composed functions */ 273 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 274 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 275 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 280 { 281 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 282 PetscInt m = A->rmap->n; 283 SuperLUStat_t stat; 284 PetscReal berr[1]; 285 PetscScalar *bptr = NULL; 286 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 287 float sberr[1]; 288 #endif 289 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 290 static PetscBool cite = PETSC_FALSE; 291 292 PetscFunctionBegin; 293 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 294 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 295 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 296 &cite)); 297 298 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 299 /* see comments in MatMatSolve() */ 300 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 301 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 302 else 303 #endif 304 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 305 lu->options.SolveInitialized = NO; 306 } 307 PetscCall(VecCopy(b_mpi, x)); 308 PetscCall(VecGetArray(x, &bptr)); 309 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 310 if (lu->singleprecision) { 311 PetscInt n; 312 PetscCall(VecGetLocalSize(x, &n)); 313 if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr)); 314 for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 315 } 316 #endif 317 318 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 319 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 320 if (lu->use3d) { 321 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 322 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 323 else 324 #endif 325 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 326 } else 327 #endif 328 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 329 if (lu->singleprecision) 330 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 331 else 332 #endif 333 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 334 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 335 336 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 337 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 338 339 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 340 if (lu->singleprecision) { 341 PetscInt n; 342 PetscCall(VecGetLocalSize(x, &n)); 343 for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i]; 344 } 345 #endif 346 PetscCall(VecRestoreArray(x, &bptr)); 347 lu->matsolve_iscalled = PETSC_TRUE; 348 lu->matmatsolve_iscalled = PETSC_FALSE; 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 353 { 354 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 355 PetscInt m = A->rmap->n, nrhs; 356 SuperLUStat_t stat; 357 PetscReal berr[1]; 358 PetscScalar *bptr; 359 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 360 PetscBool flg; 361 362 PetscFunctionBegin; 363 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 364 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single"); 365 #endif 366 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 367 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 368 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 369 if (X != B_mpi) { 370 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 371 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 372 } 373 374 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 375 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 376 thus destroy it and create a new SOLVEstruct. 377 Otherwise it may result in memory corruption or incorrect solution 378 See src/mat/tests/ex125.c */ 379 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 380 lu->options.SolveInitialized = NO; 381 } 382 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 383 384 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 385 386 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 387 PetscCall(MatDenseGetArray(X, &bptr)); 388 389 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 390 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 391 else 392 #endif 393 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 394 395 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 396 PetscCall(MatDenseRestoreArray(X, &bptr)); 397 398 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 399 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 400 lu->matsolve_iscalled = PETSC_FALSE; 401 lu->matmatsolve_iscalled = PETSC_TRUE; 402 PetscFunctionReturn(PETSC_SUCCESS); 403 } 404 405 /* 406 input: 407 F: numeric Cholesky factor 408 output: 409 nneg: total number of negative pivots 410 nzero: total number of zero pivots 411 npos: (global dimension of F) - nneg - nzero 412 */ 413 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 414 { 415 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 416 PetscScalar *diagU = NULL; 417 PetscInt M, i, neg = 0, zero = 0, pos = 0; 418 PetscReal r; 419 420 PetscFunctionBegin; 421 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 422 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single"); 423 #endif 424 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 425 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 426 PetscCall(MatGetSize(F, &M, NULL)); 427 PetscCall(PetscMalloc1(M, &diagU)); 428 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 429 for (i = 0; i < M; i++) { 430 #if defined(PETSC_USE_COMPLEX) 431 r = PetscImaginaryPart(diagU[i]) / 10.0; 432 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 433 r = PetscRealPart(diagU[i]); 434 #else 435 r = diagU[i]; 436 #endif 437 if (r > 0) { 438 pos++; 439 } else if (r < 0) { 440 neg++; 441 } else zero++; 442 } 443 444 PetscCall(PetscFree(diagU)); 445 if (nneg) *nneg = neg; 446 if (nzero) *nzero = zero; 447 if (npos) *npos = pos; 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 452 { 453 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 454 Mat Aloc; 455 const PetscScalar *av; 456 const PetscInt *ai = NULL, *aj = NULL; 457 PetscInt nz, dummy; 458 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 459 SuperLUStat_t stat; 460 PetscReal *berr = 0; 461 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 462 float *sberr = 0; 463 #endif 464 PetscBool ismpiaij, isseqaij, flg; 465 466 PetscFunctionBegin; 467 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 468 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 469 if (ismpiaij) { 470 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 471 } else if (isseqaij) { 472 PetscCall(PetscObjectReference((PetscObject)A)); 473 Aloc = A; 474 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 475 476 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 477 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 478 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 479 nz = ai[Aloc->rmap->n]; 480 481 /* Allocations for A_sup */ 482 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 483 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 484 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 485 else 486 #endif 487 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 488 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 489 if (lu->FactPattern == SamePattern_SameRowPerm) { 490 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 491 } else if (lu->FactPattern == SamePattern) { 492 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 493 if (lu->use3d) { 494 if (lu->grid3d.zscp.Iam == 0) { 495 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 496 if (lu->singleprecision) { 497 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 498 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 499 } else 500 #endif 501 { 502 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 503 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 504 } 505 } else { 506 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 507 if (lu->singleprecision) { 508 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d)); 509 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct)); 510 } else 511 #endif 512 { 513 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 514 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 515 } 516 } 517 } else 518 #endif 519 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 520 if (lu->singleprecision) 521 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 522 else 523 #endif 524 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 525 lu->options.Fact = SamePattern; 526 } else if (lu->FactPattern == DOFACT) { 527 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 528 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 529 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 530 else 531 #endif 532 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 533 lu->options.Fact = DOFACT; 534 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 535 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 536 else 537 #endif 538 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 539 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 540 } 541 542 /* Copy AIJ matrix to superlu_dist matrix */ 543 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 544 PetscCall(PetscArraycpy(lu->col, aj, nz)); 545 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 546 if (lu->singleprecision) 547 for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 548 else 549 #endif 550 PetscCall(PetscArraycpy(lu->val, av, nz)); 551 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 552 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 553 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 554 PetscCall(MatDestroy(&Aloc)); 555 556 /* Create and setup A_sup */ 557 if (lu->options.Fact == DOFACT) { 558 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 559 if (lu->singleprecision) 560 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE)); 561 else 562 #endif 563 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 564 } 565 566 /* Factor the matrix. */ 567 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 568 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 569 if (lu->use3d) { 570 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 571 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 572 else 573 #endif 574 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 575 } else 576 #endif 577 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 578 if (lu->singleprecision) 579 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 580 else 581 #endif 582 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 583 if (sinfo > 0) { 584 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 585 if (sinfo <= lu->A_sup.ncol) { 586 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 587 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 588 } else if (sinfo > lu->A_sup.ncol) { 589 /* 590 number of bytes allocated when memory allocation 591 failure occurred, plus A->ncol. 592 */ 593 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 594 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 595 } 596 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 597 598 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 599 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 600 F->assembled = PETSC_TRUE; 601 F->preallocated = PETSC_TRUE; 602 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 /* Note the Petsc r and c permutations are ignored */ 607 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 608 { 609 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 610 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 611 PetscMPIInt size, mpiflg; 612 PetscBool flg, set; 613 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 614 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 615 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 616 MPI_Comm comm; 617 PetscSuperLU_DIST *context = NULL; 618 619 PetscFunctionBegin; 620 /* Set options to F */ 621 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 622 PetscCallMPI(MPI_Comm_size(comm, &size)); 623 624 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 625 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 626 if (set && !flg) lu->options.Equil = NO; 627 628 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 629 if (flg) { 630 switch (indx) { 631 case 0: 632 lu->options.RowPerm = NOROWPERM; 633 break; 634 case 1: 635 lu->options.RowPerm = LargeDiag_MC64; 636 break; 637 case 2: 638 lu->options.RowPerm = LargeDiag_AWPM; 639 break; 640 case 3: 641 lu->options.RowPerm = MY_PERMR; 642 break; 643 default: 644 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 645 } 646 } 647 648 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 649 if (flg) { 650 switch (indx) { 651 case 0: 652 lu->options.ColPerm = NATURAL; 653 break; 654 case 1: 655 lu->options.ColPerm = MMD_AT_PLUS_A; 656 break; 657 case 2: 658 lu->options.ColPerm = MMD_ATA; 659 break; 660 case 3: 661 lu->options.ColPerm = METIS_AT_PLUS_A; 662 break; 663 case 4: 664 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 665 break; 666 default: 667 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 668 } 669 } 670 671 lu->options.ReplaceTinyPivot = NO; 672 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 673 if (set && flg) lu->options.ReplaceTinyPivot = YES; 674 675 lu->options.ParSymbFact = NO; 676 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 677 if (set && flg && size > 1) { 678 #if defined(PETSC_HAVE_PARMETIS) 679 lu->options.ParSymbFact = YES; 680 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 681 #else 682 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 683 #endif 684 } 685 686 lu->FactPattern = SamePattern; 687 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 688 if (flg) { 689 switch (indx) { 690 case 0: 691 lu->FactPattern = SamePattern; 692 break; 693 case 1: 694 lu->FactPattern = SamePattern_SameRowPerm; 695 break; 696 case 2: 697 lu->FactPattern = DOFACT; 698 break; 699 } 700 } 701 702 lu->options.IterRefine = NOREFINE; 703 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 704 if (set) { 705 if (flg) lu->options.IterRefine = SLU_DOUBLE; 706 else lu->options.IterRefine = NOREFINE; 707 } 708 709 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 710 else lu->options.PrintStat = NO; 711 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 712 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 713 714 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 715 if (!mpiflg || context->busy) { /* additional options */ 716 if (!mpiflg) { 717 PetscCall(PetscNew(&context)); 718 context->busy = PETSC_TRUE; 719 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 720 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 721 } else { 722 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 723 } 724 725 /* Default number of process columns and rows */ 726 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 727 if (!lu->nprow) lu->nprow = 1; 728 while (lu->nprow > 0) { 729 lu->npcol = (int_t)(size / lu->nprow); 730 if (size == lu->nprow * lu->npcol) break; 731 lu->nprow--; 732 } 733 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 734 lu->use3d = PETSC_FALSE; 735 lu->npdep = 1; 736 #endif 737 738 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 739 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 740 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 741 if (lu->use3d) { 742 PetscInt t; 743 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 744 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 745 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 746 if (lu->npdep > 1) { 747 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 748 if (!lu->nprow) lu->nprow = 1; 749 while (lu->nprow > 0) { 750 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 751 if (size == lu->nprow * lu->npcol * lu->npdep) break; 752 lu->nprow--; 753 } 754 } 755 } 756 #endif 757 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 758 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 759 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 760 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 761 #else 762 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 763 #endif 764 /* end of adding additional options */ 765 766 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 767 if (lu->use3d) { 768 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 769 if (context) { 770 context->grid3d = lu->grid3d; 771 context->use3d = lu->use3d; 772 } 773 } else { 774 #endif 775 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 776 if (context) context->grid = lu->grid; 777 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 778 } 779 #endif 780 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 781 if (mpiflg) { 782 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 783 } else { 784 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 785 } 786 } else { /* (mpiflg && !context->busy) */ 787 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n")); 788 context->busy = PETSC_TRUE; 789 lu->grid = context->grid; 790 } 791 PetscOptionsEnd(); 792 793 /* Initialize ScalePermstruct and LUstruct. */ 794 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 795 if (lu->singleprecision) { 796 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct)); 797 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct)); 798 } else 799 #endif 800 { 801 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 802 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 803 } 804 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 805 F->ops->solve = MatSolve_SuperLU_DIST; 806 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 807 F->ops->getinertia = NULL; 808 809 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 810 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 815 { 816 PetscFunctionBegin; 817 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 818 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 823 { 824 PetscFunctionBegin; 825 *type = MATSOLVERSUPERLU_DIST; 826 PetscFunctionReturn(PETSC_SUCCESS); 827 } 828 829 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 830 { 831 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 832 superlu_dist_options_t options; 833 834 PetscFunctionBegin; 835 /* check if matrix is superlu_dist type */ 836 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 837 838 options = lu->options; 839 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 840 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 841 * format spec for int64_t is set to %d for whatever reason */ 842 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 843 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 844 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 845 #endif 846 847 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 848 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 849 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 850 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 851 852 switch (options.RowPerm) { 853 case NOROWPERM: 854 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 855 break; 856 case LargeDiag_MC64: 857 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 858 break; 859 case LargeDiag_AWPM: 860 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 861 break; 862 case MY_PERMR: 863 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 864 break; 865 default: 866 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 867 } 868 869 switch (options.ColPerm) { 870 case NATURAL: 871 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 872 break; 873 case MMD_AT_PLUS_A: 874 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 875 break; 876 case MMD_ATA: 877 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 878 break; 879 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 880 case METIS_AT_PLUS_A: 881 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 882 break; 883 case PARMETIS: 884 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 885 break; 886 default: 887 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 888 } 889 890 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 891 892 if (lu->FactPattern == SamePattern) { 893 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 894 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 895 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 896 } else if (lu->FactPattern == DOFACT) { 897 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 898 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 899 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 900 if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n")); 901 #endif 902 PetscFunctionReturn(PETSC_SUCCESS); 903 } 904 905 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 906 { 907 PetscBool iascii; 908 PetscViewerFormat format; 909 910 PetscFunctionBegin; 911 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 912 if (iascii) { 913 PetscCall(PetscViewerGetFormat(viewer, &format)); 914 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 915 } 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 920 { 921 Mat B; 922 Mat_SuperLU_DIST *lu; 923 PetscInt M = A->rmap->N, N = A->cmap->N; 924 PetscMPIInt size; 925 superlu_dist_options_t options; 926 PetscBool flg; 927 char string[16]; 928 929 PetscFunctionBegin; 930 /* Create the factorization matrix */ 931 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 932 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 933 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 934 PetscCall(MatSetUp(B)); 935 B->ops->getinfo = MatGetInfo_External; 936 B->ops->view = MatView_SuperLU_DIST; 937 B->ops->destroy = MatDestroy_SuperLU_DIST; 938 939 /* Set the default input options: 940 options.Fact = DOFACT; 941 options.Equil = YES; 942 options.ParSymbFact = NO; 943 options.ColPerm = METIS_AT_PLUS_A; 944 options.RowPerm = LargeDiag_MC64; 945 options.ReplaceTinyPivot = YES; 946 options.IterRefine = DOUBLE; 947 options.Trans = NOTRANS; 948 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 949 options.RefineInitialized = NO; 950 options.PrintStat = YES; 951 options.SymPattern = NO; 952 */ 953 set_default_options_dist(&options); 954 955 B->trivialsymbolic = PETSC_TRUE; 956 if (ftype == MAT_FACTOR_LU) { 957 B->factortype = MAT_FACTOR_LU; 958 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 959 } else { 960 B->factortype = MAT_FACTOR_CHOLESKY; 961 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 962 options.SymPattern = YES; 963 } 964 965 /* set solvertype */ 966 PetscCall(PetscFree(B->solvertype)); 967 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 968 969 PetscCall(PetscNew(&lu)); 970 B->data = lu; 971 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 972 973 lu->options = options; 974 lu->options.Fact = DOFACT; 975 lu->matsolve_iscalled = PETSC_FALSE; 976 lu->matmatsolve_iscalled = PETSC_FALSE; 977 978 PetscCall(PetscOptionsGetString(NULL, NULL, "-pc_precision", string, sizeof(string), &flg)); 979 if (flg) { 980 PetscCall(PetscStrcasecmp(string, "single", &flg)); 981 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single as option for SuperLU_DIST"); 982 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 983 lu->singleprecision = PETSC_TRUE; 984 #endif 985 } 986 987 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 988 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 989 990 *F = B; 991 PetscFunctionReturn(PETSC_SUCCESS); 992 } 993 994 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 995 { 996 PetscFunctionBegin; 997 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 998 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 999 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 1000 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 1001 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 1002 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, NULL)); 1003 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 /*MC 1009 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 1010 1011 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 1012 1013 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 1014 1015 Works with `MATAIJ` matrices 1016 1017 Options Database Keys: 1018 + -mat_superlu_dist_r <n> - number of rows in processor partition 1019 . -mat_superlu_dist_c <n> - number of columns in processor partition 1020 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 1021 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 1022 . -mat_superlu_dist_equil - equilibrate the matrix 1023 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 1024 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 1025 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 1026 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 1027 . -mat_superlu_dist_iterrefine - use iterative refinement 1028 . -mat_superlu_dist_printstat - print factorization information 1029 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision. Currently this does not accept an options prefix, so 1030 regardless of the `PC` prefix you must use no prefix here 1031 1032 Level: beginner 1033 1034 Note: 1035 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 1036 1037 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 1038 M*/ 1039