1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #define CASTDOUBLECOMPLEX (doublecomplex *) 13 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 14 #include <superlu_zdefs.h> 15 #define LUstructInit zLUstructInit 16 #define ScalePermstructInit zScalePermstructInit 17 #define ScalePermstructFree zScalePermstructFree 18 #define LUstructFree zLUstructFree 19 #define Destroy_LU zDestroy_LU 20 #define ScalePermstruct_t zScalePermstruct_t 21 #define LUstruct_t zLUstruct_t 22 #define SOLVEstruct_t zSOLVEstruct_t 23 #define SolveFinalize zSolveFinalize 24 #define pGetDiagU pzGetDiagU 25 #define pgssvx pzgssvx 26 #define allocateA_dist zallocateA_dist 27 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 28 #define SLU SLU_Z 29 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 30 #define DeAllocLlu_3d zDeAllocLlu_3d 31 #define DeAllocGlu_3d zDeAllocGlu_3d 32 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 33 #define pgssvx3d pzgssvx3d 34 #endif 35 #elif defined(PETSC_USE_REAL_SINGLE) 36 #define CASTDOUBLECOMPLEX 37 #define CASTDOUBLECOMPLEXSTAR 38 #include <superlu_sdefs.h> 39 #define LUstructInit sLUstructInit 40 #define ScalePermstructInit sScalePermstructInit 41 #define ScalePermstructFree sScalePermstructFree 42 #define LUstructFree sLUstructFree 43 #define Destroy_LU sDestroy_LU 44 #define ScalePermstruct_t sScalePermstruct_t 45 #define LUstruct_t sLUstruct_t 46 #define SOLVEstruct_t sSOLVEstruct_t 47 #define SolveFinalize sSolveFinalize 48 #define pGetDiagU psGetDiagU 49 #define pgssvx psgssvx 50 #define allocateA_dist sallocateA_dist 51 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 52 #define SLU SLU_S 53 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 54 #define DeAllocLlu_3d sDeAllocLlu_3d 55 #define DeAllocGlu_3d sDeAllocGlu_3d 56 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 57 #define pgssvx3d psgssvx3d 58 #endif 59 #else 60 #define CASTDOUBLECOMPLEX 61 #define CASTDOUBLECOMPLEXSTAR 62 #include <superlu_ddefs.h> 63 #define LUstructInit dLUstructInit 64 #define ScalePermstructInit dScalePermstructInit 65 #define ScalePermstructFree dScalePermstructFree 66 #define LUstructFree dLUstructFree 67 #define Destroy_LU dDestroy_LU 68 #define ScalePermstruct_t dScalePermstruct_t 69 #define LUstruct_t dLUstruct_t 70 #define SOLVEstruct_t dSOLVEstruct_t 71 #define SolveFinalize dSolveFinalize 72 #define pGetDiagU pdGetDiagU 73 #define pgssvx pdgssvx 74 #define allocateA_dist dallocateA_dist 75 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 76 #define SLU SLU_D 77 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 78 #define DeAllocLlu_3d dDeAllocLlu_3d 79 #define DeAllocGlu_3d dDeAllocGlu_3d 80 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 81 #define pgssvx3d pdgssvx3d 82 #endif 83 #endif 84 #include <superlu_sdefs.h> 85 EXTERN_C_END 86 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 87 88 typedef struct { 89 int_t nprow, npcol, *row, *col; 90 gridinfo_t grid; 91 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 92 PetscBool use3d; 93 int_t npdep; /* replication factor, must be power of two */ 94 gridinfo3d_t grid3d; 95 #endif 96 superlu_dist_options_t options; 97 SuperMatrix A_sup; 98 sScalePermstruct_t sScalePermstruct; /* used instead of the following line if ->singleprecision is used */ 99 ScalePermstruct_t ScalePermstruct; 100 sLUstruct_t sLUstruct; /* used instead of the following line if ->singleprecision is used */ 101 LUstruct_t LUstruct; 102 int StatPrint; 103 sSOLVEstruct_t sSOLVEstruct; /* used instead of the following line if ->singleprecision is used */ 104 SOLVEstruct_t SOLVEstruct; 105 fact_t FactPattern; 106 MPI_Comm comm_superlu; 107 float *sval; /* used instead of the following line if ->singleprecision is used */ 108 PetscScalar *val; 109 PetscBool matsolve_iscalled, matmatsolve_iscalled; 110 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 111 PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */ 112 float *sbptr; /* solution space if ->singleprecision is used */ 113 } Mat_SuperLU_DIST; 114 115 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 116 { 117 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 118 119 PetscFunctionBegin; 120 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 121 PetscFunctionReturn(PETSC_SUCCESS); 122 } 123 124 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 125 { 126 PetscFunctionBegin; 127 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 128 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 129 PetscFunctionReturn(PETSC_SUCCESS); 130 } 131 132 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 133 typedef struct { 134 MPI_Comm comm; 135 PetscBool busy; 136 gridinfo_t grid; 137 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 138 PetscBool use3d; 139 gridinfo3d_t grid3d; 140 #endif 141 } PetscSuperLU_DIST; 142 143 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 144 145 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 146 { 147 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 148 149 PetscFunctionBegin; 150 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 151 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 152 if (context->use3d) { 153 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 154 } else 155 #endif 156 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 157 PetscCallMPI(MPI_Comm_free(&context->comm)); 158 PetscCall(PetscFree(context)); 159 PetscFunctionReturn(MPI_SUCCESS); 160 } 161 162 /* 163 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 164 users who do not destroy all PETSc objects before PetscFinalize(). 165 166 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 167 can still check that the keyval associated with the MPI communicator is correct when the MPI 168 communicator is destroyed. 169 170 This is called in PetscFinalize() 171 */ 172 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 173 { 174 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 175 176 PetscFunctionBegin; 177 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 182 { 183 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 184 185 PetscFunctionBegin; 186 PetscCall(PetscFree(lu->sbptr)); 187 if (lu->CleanUpSuperLU_Dist) { 188 /* Deallocate SuperLU_DIST storage */ 189 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 190 if (lu->options.SolveInitialized) { 191 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 192 else PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 193 } 194 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 195 if (lu->use3d) { 196 if (lu->grid3d.zscp.Iam == 0) { 197 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 198 else PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 199 } else { 200 if (lu->singleprecision) { 201 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d)); 202 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct)); 203 } else { 204 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 205 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 206 } 207 } 208 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d)); 209 else PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 210 } else 211 #endif 212 if (lu->singleprecision) { 213 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct)); 214 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 215 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 216 } else { 217 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 218 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 219 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 220 } 221 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 222 if (lu->comm_superlu) { 223 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 224 if (lu->use3d) { 225 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 226 } else 227 #endif 228 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 229 } 230 } 231 /* 232 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 233 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 234 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 235 * is off, and the communicator was not released or marked as "not busy " in the old code. 236 * Here we try to release comm regardless. 237 */ 238 if (lu->comm_superlu) { 239 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 240 } else { 241 PetscSuperLU_DIST *context; 242 MPI_Comm comm; 243 PetscMPIInt flg; 244 245 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 246 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 247 if (flg) context->busy = PETSC_FALSE; 248 } 249 250 PetscCall(PetscFree(A->data)); 251 /* clear composed functions */ 252 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 253 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 254 255 PetscFunctionReturn(PETSC_SUCCESS); 256 } 257 258 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 259 { 260 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 261 PetscInt m = A->rmap->n; 262 SuperLUStat_t stat; 263 PetscReal berr[1]; 264 PetscScalar *bptr = NULL; 265 float sberr[1]; 266 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 267 static PetscBool cite = PETSC_FALSE; 268 269 PetscFunctionBegin; 270 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 271 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 272 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 273 &cite)); 274 275 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 276 /* see comments in MatMatSolve() */ 277 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 278 else PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 279 lu->options.SolveInitialized = NO; 280 } 281 PetscCall(VecCopy(b_mpi, x)); 282 PetscCall(VecGetArray(x, &bptr)); 283 if (lu->singleprecision) { 284 PetscInt n; 285 PetscCall(VecGetLocalSize(x, &n)); 286 if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr)); 287 for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 288 } 289 290 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 291 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 292 if (lu->use3d) { 293 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 294 else PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 295 } else 296 #endif 297 if (lu->singleprecision) 298 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 299 else PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 300 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 301 302 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 303 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 304 305 if (lu->singleprecision) { 306 PetscInt n; 307 PetscCall(VecGetLocalSize(x, &n)); 308 for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i]; 309 } 310 PetscCall(VecRestoreArray(x, &bptr)); 311 lu->matsolve_iscalled = PETSC_TRUE; 312 lu->matmatsolve_iscalled = PETSC_FALSE; 313 PetscFunctionReturn(PETSC_SUCCESS); 314 } 315 316 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 317 { 318 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 319 PetscInt m = A->rmap->n, nrhs; 320 SuperLUStat_t stat; 321 PetscReal berr[1]; 322 PetscScalar *bptr; 323 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 324 PetscBool flg; 325 326 PetscFunctionBegin; 327 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single"); 328 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 329 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 330 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 331 if (X != B_mpi) { 332 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 333 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 334 } 335 336 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 337 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 338 thus destroy it and create a new SOLVEstruct. 339 Otherwise it may result in memory corruption or incorrect solution 340 See src/mat/tests/ex125.c */ 341 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 342 lu->options.SolveInitialized = NO; 343 } 344 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 345 346 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 347 348 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 349 PetscCall(MatDenseGetArray(X, &bptr)); 350 351 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 352 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 353 else 354 #endif 355 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 356 357 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 358 PetscCall(MatDenseRestoreArray(X, &bptr)); 359 360 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 361 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 362 lu->matsolve_iscalled = PETSC_FALSE; 363 lu->matmatsolve_iscalled = PETSC_TRUE; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 /* 368 input: 369 F: numeric Cholesky factor 370 output: 371 nneg: total number of negative pivots 372 nzero: total number of zero pivots 373 npos: (global dimension of F) - nneg - nzero 374 */ 375 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 376 { 377 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 378 PetscScalar *diagU = NULL; 379 PetscInt M, i, neg = 0, zero = 0, pos = 0; 380 PetscReal r; 381 382 PetscFunctionBegin; 383 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single"); 384 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 385 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 386 PetscCall(MatGetSize(F, &M, NULL)); 387 PetscCall(PetscMalloc1(M, &diagU)); 388 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 389 for (i = 0; i < M; i++) { 390 #if defined(PETSC_USE_COMPLEX) 391 r = PetscImaginaryPart(diagU[i]) / 10.0; 392 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 393 r = PetscRealPart(diagU[i]); 394 #else 395 r = diagU[i]; 396 #endif 397 if (r > 0) { 398 pos++; 399 } else if (r < 0) { 400 neg++; 401 } else zero++; 402 } 403 404 PetscCall(PetscFree(diagU)); 405 if (nneg) *nneg = neg; 406 if (nzero) *nzero = zero; 407 if (npos) *npos = pos; 408 PetscFunctionReturn(PETSC_SUCCESS); 409 } 410 411 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 412 { 413 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 414 Mat Aloc; 415 const PetscScalar *av; 416 const PetscInt *ai = NULL, *aj = NULL; 417 PetscInt nz, dummy; 418 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 419 SuperLUStat_t stat; 420 PetscReal *berr = 0; 421 float *sberr = 0; 422 PetscBool ismpiaij, isseqaij, flg; 423 424 PetscFunctionBegin; 425 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 426 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 427 if (ismpiaij) { 428 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 429 } else if (isseqaij) { 430 PetscCall(PetscObjectReference((PetscObject)A)); 431 Aloc = A; 432 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 433 434 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 435 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 436 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 437 nz = ai[Aloc->rmap->n]; 438 439 /* Allocations for A_sup */ 440 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 441 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 442 else PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 443 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 444 if (lu->FactPattern == SamePattern_SameRowPerm) { 445 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 446 } else if (lu->FactPattern == SamePattern) { 447 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 448 if (lu->use3d) { 449 if (lu->grid3d.zscp.Iam == 0) { 450 if (lu->singleprecision) { 451 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 452 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 453 } else { 454 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 455 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 456 } 457 } else { 458 if (lu->singleprecision) { 459 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", sDeAllocLlu_3d(lu->A_sup.ncol, &lu->sLUstruct, &lu->grid3d)); 460 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", sDeAllocGlu_3d(&lu->sLUstruct)); 461 } else { 462 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 463 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 464 } 465 } 466 } else 467 #endif 468 if (lu->singleprecision) 469 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 470 else PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 471 lu->options.Fact = SamePattern; 472 } else if (lu->FactPattern == DOFACT) { 473 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 474 if (lu->singleprecision) { 475 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 476 } else { 477 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 478 } 479 lu->options.Fact = DOFACT; 480 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 481 else PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 482 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 483 } 484 485 /* Copy AIJ matrix to superlu_dist matrix */ 486 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 487 PetscCall(PetscArraycpy(lu->col, aj, nz)); 488 if (lu->singleprecision) 489 for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 490 else PetscCall(PetscArraycpy(lu->val, av, nz)); 491 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 492 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 493 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 494 PetscCall(MatDestroy(&Aloc)); 495 496 /* Create and setup A_sup */ 497 if (lu->options.Fact == DOFACT) { 498 if (lu->singleprecision) 499 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE)); 500 else 501 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 502 } 503 504 /* Factor the matrix. */ 505 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 506 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 507 if (lu->use3d) { 508 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 509 else PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 510 } else 511 #endif 512 if (lu->singleprecision) 513 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 514 else PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 515 if (sinfo > 0) { 516 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 517 if (sinfo <= lu->A_sup.ncol) { 518 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 519 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 520 } else if (sinfo > lu->A_sup.ncol) { 521 /* 522 number of bytes allocated when memory allocation 523 failure occurred, plus A->ncol. 524 */ 525 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 526 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 527 } 528 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 529 530 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 531 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 532 F->assembled = PETSC_TRUE; 533 F->preallocated = PETSC_TRUE; 534 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 535 PetscFunctionReturn(PETSC_SUCCESS); 536 } 537 538 /* Note the Petsc r and c permutations are ignored */ 539 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 540 { 541 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 542 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 543 PetscMPIInt size, mpiflg; 544 PetscBool flg, set; 545 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 546 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 547 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 548 MPI_Comm comm; 549 PetscSuperLU_DIST *context = NULL; 550 551 PetscFunctionBegin; 552 /* Set options to F */ 553 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 554 PetscCallMPI(MPI_Comm_size(comm, &size)); 555 556 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 557 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 558 if (set && !flg) lu->options.Equil = NO; 559 560 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 561 if (flg) { 562 switch (indx) { 563 case 0: 564 lu->options.RowPerm = NOROWPERM; 565 break; 566 case 1: 567 lu->options.RowPerm = LargeDiag_MC64; 568 break; 569 case 2: 570 lu->options.RowPerm = LargeDiag_AWPM; 571 break; 572 case 3: 573 lu->options.RowPerm = MY_PERMR; 574 break; 575 default: 576 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 577 } 578 } 579 580 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 581 if (flg) { 582 switch (indx) { 583 case 0: 584 lu->options.ColPerm = NATURAL; 585 break; 586 case 1: 587 lu->options.ColPerm = MMD_AT_PLUS_A; 588 break; 589 case 2: 590 lu->options.ColPerm = MMD_ATA; 591 break; 592 case 3: 593 lu->options.ColPerm = METIS_AT_PLUS_A; 594 break; 595 case 4: 596 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 597 break; 598 default: 599 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 600 } 601 } 602 603 lu->options.ReplaceTinyPivot = NO; 604 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 605 if (set && flg) lu->options.ReplaceTinyPivot = YES; 606 607 lu->options.ParSymbFact = NO; 608 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 609 if (set && flg && size > 1) { 610 #if defined(PETSC_HAVE_PARMETIS) 611 lu->options.ParSymbFact = YES; 612 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 613 #else 614 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 615 #endif 616 } 617 618 lu->FactPattern = SamePattern; 619 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 620 if (flg) { 621 switch (indx) { 622 case 0: 623 lu->FactPattern = SamePattern; 624 break; 625 case 1: 626 lu->FactPattern = SamePattern_SameRowPerm; 627 break; 628 case 2: 629 lu->FactPattern = DOFACT; 630 break; 631 } 632 } 633 634 lu->options.IterRefine = NOREFINE; 635 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 636 if (set) { 637 if (flg) lu->options.IterRefine = SLU_DOUBLE; 638 else lu->options.IterRefine = NOREFINE; 639 } 640 641 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 642 else lu->options.PrintStat = NO; 643 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 644 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 645 646 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 647 if (!mpiflg || context->busy) { /* additional options */ 648 if (!mpiflg) { 649 PetscCall(PetscNew(&context)); 650 context->busy = PETSC_TRUE; 651 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 652 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 653 } else { 654 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 655 } 656 657 /* Default number of process columns and rows */ 658 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 659 if (!lu->nprow) lu->nprow = 1; 660 while (lu->nprow > 0) { 661 lu->npcol = (int_t)(size / lu->nprow); 662 if (size == lu->nprow * lu->npcol) break; 663 lu->nprow--; 664 } 665 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 666 lu->use3d = PETSC_FALSE; 667 lu->npdep = 1; 668 #endif 669 670 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 671 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 672 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 673 if (lu->use3d) { 674 PetscInt t; 675 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 676 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 677 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 678 if (lu->npdep > 1) { 679 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 680 if (!lu->nprow) lu->nprow = 1; 681 while (lu->nprow > 0) { 682 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 683 if (size == lu->nprow * lu->npcol * lu->npdep) break; 684 lu->nprow--; 685 } 686 } 687 } 688 #endif 689 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 690 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 691 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 692 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 693 #else 694 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 695 #endif 696 /* end of adding additional options */ 697 698 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 699 if (lu->use3d) { 700 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 701 if (context) { 702 context->grid3d = lu->grid3d; 703 context->use3d = lu->use3d; 704 } 705 } else { 706 #endif 707 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 708 if (context) context->grid = lu->grid; 709 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 710 } 711 #endif 712 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 713 if (mpiflg) { 714 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 715 } else { 716 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 717 } 718 } else { /* (mpiflg && !context->busy) */ 719 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 720 context->busy = PETSC_TRUE; 721 lu->grid = context->grid; 722 } 723 PetscOptionsEnd(); 724 725 /* Initialize ScalePermstruct and LUstruct. */ 726 if (lu->singleprecision) { 727 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct)); 728 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct)); 729 } else { 730 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 731 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 732 } 733 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 734 F->ops->solve = MatSolve_SuperLU_DIST; 735 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 736 F->ops->getinertia = NULL; 737 738 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 739 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 744 { 745 PetscFunctionBegin; 746 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 747 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 752 { 753 PetscFunctionBegin; 754 *type = MATSOLVERSUPERLU_DIST; 755 PetscFunctionReturn(PETSC_SUCCESS); 756 } 757 758 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 759 { 760 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 761 superlu_dist_options_t options; 762 763 PetscFunctionBegin; 764 /* check if matrix is superlu_dist type */ 765 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 766 767 options = lu->options; 768 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 769 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 770 * format spec for int64_t is set to %d for whatever reason */ 771 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 772 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 773 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 774 #endif 775 776 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 777 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 778 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 779 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 780 781 switch (options.RowPerm) { 782 case NOROWPERM: 783 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 784 break; 785 case LargeDiag_MC64: 786 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 787 break; 788 case LargeDiag_AWPM: 789 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 790 break; 791 case MY_PERMR: 792 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 793 break; 794 default: 795 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 796 } 797 798 switch (options.ColPerm) { 799 case NATURAL: 800 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 801 break; 802 case MMD_AT_PLUS_A: 803 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 804 break; 805 case MMD_ATA: 806 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 807 break; 808 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 809 case METIS_AT_PLUS_A: 810 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 811 break; 812 case PARMETIS: 813 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 814 break; 815 default: 816 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 817 } 818 819 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 820 821 if (lu->FactPattern == SamePattern) { 822 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 823 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 824 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 825 } else if (lu->FactPattern == DOFACT) { 826 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 827 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 828 if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n")); 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 833 { 834 PetscBool iascii; 835 PetscViewerFormat format; 836 837 PetscFunctionBegin; 838 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 839 if (iascii) { 840 PetscCall(PetscViewerGetFormat(viewer, &format)); 841 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 842 } 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 846 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 847 { 848 Mat B; 849 Mat_SuperLU_DIST *lu; 850 PetscInt M = A->rmap->N, N = A->cmap->N; 851 PetscMPIInt size; 852 superlu_dist_options_t options; 853 PetscBool flg; 854 char string[16]; 855 856 PetscFunctionBegin; 857 /* Create the factorization matrix */ 858 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 859 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 860 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 861 PetscCall(MatSetUp(B)); 862 B->ops->getinfo = MatGetInfo_External; 863 B->ops->view = MatView_SuperLU_DIST; 864 B->ops->destroy = MatDestroy_SuperLU_DIST; 865 866 /* Set the default input options: 867 options.Fact = DOFACT; 868 options.Equil = YES; 869 options.ParSymbFact = NO; 870 options.ColPerm = METIS_AT_PLUS_A; 871 options.RowPerm = LargeDiag_MC64; 872 options.ReplaceTinyPivot = YES; 873 options.IterRefine = DOUBLE; 874 options.Trans = NOTRANS; 875 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 876 options.RefineInitialized = NO; 877 options.PrintStat = YES; 878 options.SymPattern = NO; 879 */ 880 set_default_options_dist(&options); 881 882 B->trivialsymbolic = PETSC_TRUE; 883 if (ftype == MAT_FACTOR_LU) { 884 B->factortype = MAT_FACTOR_LU; 885 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 886 } else { 887 B->factortype = MAT_FACTOR_CHOLESKY; 888 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 889 options.SymPattern = YES; 890 } 891 892 /* set solvertype */ 893 PetscCall(PetscFree(B->solvertype)); 894 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 895 896 PetscCall(PetscNew(&lu)); 897 B->data = lu; 898 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 899 900 lu->options = options; 901 lu->options.Fact = DOFACT; 902 lu->matsolve_iscalled = PETSC_FALSE; 903 lu->matmatsolve_iscalled = PETSC_FALSE; 904 905 PetscCall(PetscOptionsGetString(NULL, NULL, "-pc_precision", string, sizeof(string), &flg)); 906 if (flg) { 907 PetscCall(PetscStrcasecmp(string, "single", &flg)); 908 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single as option for SuperLU_DIST"); 909 lu->singleprecision = PETSC_TRUE; 910 #if defined(PETSC_USE_COMPLEX) 911 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for -pc_precision single with complex"); 912 #endif 913 } 914 915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 917 918 *F = B; 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } 921 922 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 923 { 924 PetscFunctionBegin; 925 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 926 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 927 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 928 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 929 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 930 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, NULL)); 931 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 932 } 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 /*MC 937 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 938 939 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 940 941 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 942 943 Works with `MATAIJ` matrices 944 945 Options Database Keys: 946 + -mat_superlu_dist_r <n> - number of rows in processor partition 947 . -mat_superlu_dist_c <n> - number of columns in processor partition 948 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 949 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 950 . -mat_superlu_dist_equil - equilibrate the matrix 951 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 952 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 953 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 954 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 955 . -mat_superlu_dist_iterrefine - use iterative refinement 956 . -mat_superlu_dist_printstat - print factorization information 957 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision. Currently this does not accept an options prefix, so 958 regardless of the `PC` prefix you must use no prefix here 959 960 Level: beginner 961 962 Note: 963 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 964 965 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 966 M*/ 967