1 /*
2 Provides an interface to the SuperLU_DIST sparse solver
3 */
4
5 #include <../src/mat/impls/aij/seq/aij.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h>
7 #include <petscpkg_version.h>
8
9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef")
10 EXTERN_C_BEGIN
11 // To suppress compiler warnings
12 #define DISABLE_CUSPARSE_DEPRECATED
13 #if defined(PETSC_USE_COMPLEX)
14 #define CASTDOUBLECOMPLEX (doublecomplex *)
15 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
16 #include <superlu_zdefs.h>
17 #define LUstructInit zLUstructInit
18 #define ScalePermstructInit zScalePermstructInit
19 #define ScalePermstructFree zScalePermstructFree
20 #define LUstructFree zLUstructFree
21 #define Destroy_LU zDestroy_LU
22 #define ScalePermstruct_t zScalePermstruct_t
23 #define LUstruct_t zLUstruct_t
24 #define SOLVEstruct_t zSOLVEstruct_t
25 #define SolveFinalize zSolveFinalize
26 #define pGetDiagU pzGetDiagU
27 #define pgssvx pzgssvx
28 #define allocateA_dist zallocateA_dist
29 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
30 #define SLU SLU_Z
31 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
32 #define DeAllocLlu_3d zDeAllocLlu_3d
33 #define DeAllocGlu_3d zDeAllocGlu_3d
34 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
35 #define pgssvx3d pzgssvx3d
36 #endif
37 #elif defined(PETSC_USE_REAL_SINGLE)
38 #define CASTDOUBLECOMPLEX
39 #define CASTDOUBLECOMPLEXSTAR
40 #include <superlu_sdefs.h>
41 #define LUstructInit sLUstructInit
42 #define ScalePermstructInit sScalePermstructInit
43 #define ScalePermstructFree sScalePermstructFree
44 #define LUstructFree sLUstructFree
45 #define Destroy_LU sDestroy_LU
46 #define ScalePermstruct_t sScalePermstruct_t
47 #define LUstruct_t sLUstruct_t
48 #define SOLVEstruct_t sSOLVEstruct_t
49 #define SolveFinalize sSolveFinalize
50 #define pGetDiagU psGetDiagU
51 #define pgssvx psgssvx
52 #define allocateA_dist sallocateA_dist
53 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
54 #define SLU SLU_S
55 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
56 #define DeAllocLlu_3d sDeAllocLlu_3d
57 #define DeAllocGlu_3d sDeAllocGlu_3d
58 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
59 #define pgssvx3d psgssvx3d
60 #endif
61 #else
62 #define CASTDOUBLECOMPLEX
63 #define CASTDOUBLECOMPLEXSTAR
64 #include <superlu_ddefs.h>
65 #define LUstructInit dLUstructInit
66 #define ScalePermstructInit dScalePermstructInit
67 #define ScalePermstructFree dScalePermstructFree
68 #define LUstructFree dLUstructFree
69 #define Destroy_LU dDestroy_LU
70 #define ScalePermstruct_t dScalePermstruct_t
71 #define LUstruct_t dLUstruct_t
72 #define SOLVEstruct_t dSOLVEstruct_t
73 #define SolveFinalize dSolveFinalize
74 #define pGetDiagU pdGetDiagU
75 #define pgssvx pdgssvx
76 #define allocateA_dist dallocateA_dist
77 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
78 #define SLU SLU_D
79 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
80 #define DeAllocLlu_3d dDeAllocLlu_3d
81 #define DeAllocGlu_3d dDeAllocGlu_3d
82 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
83 #define pgssvx3d pdgssvx3d
84 #endif
85 #endif
86 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
87 #include <superlu_sdefs.h>
88 #endif
89 EXTERN_C_END
90 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
91
92 typedef struct {
93 int_t nprow, npcol, *row, *col;
94 gridinfo_t grid;
95 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
96 PetscBool use3d;
97 int_t npdep; /* replication factor, must be power of two */
98 gridinfo3d_t grid3d;
99 #endif
100 superlu_dist_options_t options;
101 SuperMatrix A_sup;
102 ScalePermstruct_t ScalePermstruct;
103 LUstruct_t LUstruct;
104 int StatPrint;
105 SOLVEstruct_t SOLVEstruct;
106 fact_t FactPattern;
107 MPI_Comm comm_superlu;
108 PetscScalar *val;
109 PetscBool matsolve_iscalled, matmatsolve_iscalled;
110 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
111 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
112 sScalePermstruct_t sScalePermstruct;
113 sLUstruct_t sLUstruct;
114 sSOLVEstruct_t sSOLVEstruct;
115 float *sval;
116 PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */
117 float *sbptr;
118 #endif
119 } Mat_SuperLU_DIST;
120
MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar * diagU)121 static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
122 {
123 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
124
125 PetscFunctionBegin;
126 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
127 PetscFunctionReturn(PETSC_SUCCESS);
128 }
129
MatSuperluDistGetDiagU(Mat F,PetscScalar * diagU)130 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
131 {
132 PetscFunctionBegin;
133 PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
134 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
135 PetscFunctionReturn(PETSC_SUCCESS);
136 }
137
138 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
139 typedef struct {
140 MPI_Comm comm;
141 PetscBool busy;
142 gridinfo_t grid;
143 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
144 PetscBool use3d;
145 gridinfo3d_t grid3d;
146 #endif
147 } PetscSuperLU_DIST;
148
149 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
150
Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm,PetscMPIInt keyval,void * attr_val,void * extra_state)151 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
152 {
153 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
154
155 PetscFunctionBegin;
156 PetscCheckReturnMPI(keyval == Petsc_Superlu_dist_keyval, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
157 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
158 if (context->use3d) {
159 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
160 } else
161 #endif
162 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
163 PetscCallMPIReturnMPI(MPI_Comm_free(&context->comm));
164 PetscCallReturnMPI(PetscFree(context));
165 PetscFunctionReturn(MPI_SUCCESS);
166 }
167
168 /*
169 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
170 users who do not destroy all PETSc objects before PetscFinalize().
171
172 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn()
173 can still check that the keyval associated with the MPI communicator is correct when the MPI
174 communicator is destroyed.
175
176 This is called in PetscFinalize()
177 */
Petsc_Superlu_dist_keyval_free(void)178 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
179 {
180 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
181
182 PetscFunctionBegin;
183 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
184 PetscFunctionReturn(PETSC_SUCCESS);
185 }
186
MatDestroy_SuperLU_DIST(Mat A)187 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
188 {
189 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
190
191 PetscFunctionBegin;
192 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
193 PetscCall(PetscFree(lu->sbptr));
194 #endif
195 if (lu->CleanUpSuperLU_Dist) {
196 /* Deallocate SuperLU_DIST storage */
197 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
198 if (lu->options.SolveInitialized) {
199 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
200 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
201 else
202 #endif
203 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
204 }
205 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
206 if (lu->use3d) {
207 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
208 if (lu->singleprecision) {
209 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
210 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d));
211 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
212 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
213 } else
214 #endif
215 {
216 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
217 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
218 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
219 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
220 }
221 } else
222 #endif
223 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
224 if (lu->singleprecision) {
225 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct));
226 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
227 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
228 } else
229 #endif
230 {
231 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
232 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
233 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
234 }
235 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
236 if (lu->comm_superlu) {
237 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
238 if (lu->use3d) {
239 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
240 } else
241 #endif
242 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
243 }
244 }
245 /*
246 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
247 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
248 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
249 * is off, and the communicator was not released or marked as "not busy " in the old code.
250 * Here we try to release comm regardless.
251 */
252 if (lu->comm_superlu) {
253 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
254 } else {
255 PetscSuperLU_DIST *context;
256 MPI_Comm comm;
257 PetscMPIInt iflg;
258
259 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
260 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg));
261 if (iflg) context->busy = PETSC_FALSE;
262 }
263
264 PetscCall(PetscFree(A->data));
265 /* clear composed functions */
266 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
267 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
268 PetscFunctionReturn(PETSC_SUCCESS);
269 }
270
MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)271 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
272 {
273 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
274 PetscInt m = A->rmap->n;
275 SuperLUStat_t stat;
276 PetscReal berr[1];
277 PetscScalar *bptr = NULL;
278 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
279 float sberr[1];
280 #endif
281 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
282 static PetscBool cite = PETSC_FALSE;
283
284 PetscFunctionBegin;
285 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
286 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM "
287 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",
288 &cite));
289
290 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
291 /* see comments in MatMatSolve() */
292 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
293 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
294 else
295 #endif
296 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
297 lu->options.SolveInitialized = NO;
298 }
299 PetscCall(VecCopy(b_mpi, x));
300 PetscCall(VecGetArray(x, &bptr));
301 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
302 if (lu->singleprecision) {
303 PetscInt n;
304 PetscCall(VecGetLocalSize(x, &n));
305 if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr));
306 for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
307 }
308 #endif
309
310 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
311 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
312 if (lu->use3d) {
313 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
314 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
315 else
316 #endif
317 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
318 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx3d fails, info: %d", info);
319 } else
320 #endif
321 {
322 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
323 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
324 else
325 #endif
326 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
327 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx fails, info: %d", info);
328 }
329 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
330 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
331 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
332 if (lu->singleprecision) {
333 PetscInt n;
334 PetscCall(VecGetLocalSize(x, &n));
335 for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i];
336 }
337 #endif
338 PetscCall(VecRestoreArray(x, &bptr));
339 lu->matsolve_iscalled = PETSC_TRUE;
340 lu->matmatsolve_iscalled = PETSC_FALSE;
341 PetscFunctionReturn(PETSC_SUCCESS);
342 }
343
MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)344 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
345 {
346 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
347 PetscInt m = A->rmap->n, nrhs;
348 SuperLUStat_t stat;
349 PetscReal berr[1];
350 PetscScalar *bptr;
351 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
352 PetscBool flg;
353
354 PetscFunctionBegin;
355 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
356 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single");
357 #endif
358 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
359 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
360 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
361 if (X != B_mpi) {
362 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
363 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
364 }
365
366 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
367 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
368 thus destroy it and create a new SOLVEstruct.
369 Otherwise it may result in memory corruption or incorrect solution
370 See src/mat/tests/ex125.c */
371 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
372 lu->options.SolveInitialized = NO;
373 }
374 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
375
376 PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
377
378 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
379 PetscCall(MatDenseGetArray(X, &bptr));
380
381 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
382 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
383 else
384 #endif
385 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
386 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
387 PetscCall(MatDenseRestoreArray(X, &bptr));
388
389 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
390 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
391 lu->matsolve_iscalled = PETSC_FALSE;
392 lu->matmatsolve_iscalled = PETSC_TRUE;
393 PetscFunctionReturn(PETSC_SUCCESS);
394 }
395
396 /*
397 input:
398 F: numeric Cholesky factor
399 output:
400 nneg: total number of negative pivots
401 nzero: total number of zero pivots
402 npos: (global dimension of F) - nneg - nzero
403 */
MatGetInertia_SuperLU_DIST(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)404 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
405 {
406 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
407 PetscScalar *diagU = NULL;
408 PetscInt M, i, neg = 0, zero = 0, pos = 0;
409 PetscReal r;
410
411 PetscFunctionBegin;
412 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
413 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single");
414 #endif
415 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
416 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
417 PetscCall(MatGetSize(F, &M, NULL));
418 PetscCall(PetscMalloc1(M, &diagU));
419 PetscCall(MatSuperluDistGetDiagU(F, diagU));
420 for (i = 0; i < M; i++) {
421 #if defined(PETSC_USE_COMPLEX)
422 r = PetscAbsReal(PetscImaginaryPart(diagU[i]));
423 PetscCheck(r < 1000 * PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)PetscImaginaryPart(diagU[i]));
424 r = PetscRealPart(diagU[i]);
425 #else
426 r = diagU[i];
427 #endif
428 if (r > 0) {
429 pos++;
430 } else if (r < 0) {
431 neg++;
432 } else zero++;
433 }
434
435 PetscCall(PetscFree(diagU));
436 if (nneg) *nneg = neg;
437 if (nzero) *nzero = zero;
438 if (npos) *npos = pos;
439 PetscFunctionReturn(PETSC_SUCCESS);
440 }
441
MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo * info)442 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
443 {
444 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
445 Mat Aloc;
446 const PetscScalar *av;
447 const PetscInt *ai = NULL, *aj = NULL;
448 PetscInt nz, unused;
449 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
450 SuperLUStat_t stat;
451 PetscReal *berr = 0;
452 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
453 float *sberr = 0;
454 #endif
455 PetscBool ismpiaij, isseqaij, flg;
456
457 PetscFunctionBegin;
458 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
459 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
460 if (ismpiaij) {
461 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
462 } else if (isseqaij) {
463 PetscCall(PetscObjectReference((PetscObject)A));
464 Aloc = A;
465 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
466
467 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &unused, &ai, &aj, &flg));
468 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
469 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
470 nz = ai[Aloc->rmap->n];
471
472 /* Allocations for A_sup */
473 if (lu->options.Fact == DOFACT) { /* first numeric factorization */
474 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
475 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
476 else
477 #endif
478 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
479 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
480 if (lu->FactPattern == SamePattern_SameRowPerm) {
481 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
482 } else if (lu->FactPattern == SamePattern) {
483 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
484 if (lu->use3d) {
485 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
486 if (lu->singleprecision) {
487 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
488 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
489 } else
490 #endif
491 {
492 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
493 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
494 }
495 } else
496 #endif
497 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
498 if (lu->singleprecision)
499 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
500 else
501 #endif
502 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
503 lu->options.Fact = SamePattern;
504 } else if (lu->FactPattern == DOFACT) {
505 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
506 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
507 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
508 else
509 #endif
510 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
511 lu->options.Fact = DOFACT;
512 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
513 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
514 else
515 #endif
516 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
517 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
518 }
519
520 /* Copy AIJ matrix to superlu_dist matrix */
521 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
522 PetscCall(PetscArraycpy(lu->col, aj, nz));
523 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
524 if (lu->singleprecision)
525 for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
526 else
527 #endif
528 PetscCall(PetscArraycpy(lu->val, av, nz));
529 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &unused, &ai, &aj, &flg));
530 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
531 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
532 PetscCall(MatDestroy(&Aloc));
533
534 /* Create and setup A_sup */
535 if (lu->options.Fact == DOFACT) {
536 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
537 if (lu->singleprecision)
538 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE));
539 else
540 #endif
541 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
542 }
543
544 /* Factor the matrix. */
545 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
546 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
547 if (lu->use3d) {
548 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
549 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
550 else
551 #endif
552 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
553 } else
554 #endif
555 {
556 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
557 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
558 else
559 #endif
560 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
561 }
562 if (sinfo > 0) {
563 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
564 if (sinfo <= lu->A_sup.ncol) {
565 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
566 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
567 } else if (sinfo > lu->A_sup.ncol) {
568 /*
569 number of bytes allocated when memory allocation
570 failure occurred, plus A->ncol.
571 */
572 F->factorerrortype = MAT_FACTOR_OUTMEMORY;
573 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
574 }
575 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
576
577 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
578 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
579 F->assembled = PETSC_TRUE;
580 F->preallocated = PETSC_TRUE;
581 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
582 PetscFunctionReturn(PETSC_SUCCESS);
583 }
584
585 /* Note the PETSc r and c permutations are ignored */
MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)586 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
587 {
588 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
589 PetscInt M = A->rmap->N, N = A->cmap->N, indx;
590 PetscMPIInt size, iflg;
591 PetscBool flg, set;
592 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
593 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
594 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
595 MPI_Comm comm;
596 PetscSuperLU_DIST *context = NULL;
597
598 PetscFunctionBegin;
599 /* Set options to F */
600 PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
601 PetscCallMPI(MPI_Comm_size(comm, &size));
602
603 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
604 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
605 if (set && !flg) lu->options.Equil = NO;
606
607 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
608 if (flg) {
609 switch (indx) {
610 case 0:
611 lu->options.RowPerm = NOROWPERM;
612 break;
613 case 1:
614 lu->options.RowPerm = LargeDiag_MC64;
615 break;
616 case 2:
617 lu->options.RowPerm = LargeDiag_AWPM;
618 break;
619 case 3:
620 lu->options.RowPerm = MY_PERMR;
621 break;
622 default:
623 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
624 }
625 }
626
627 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
628 if (flg) {
629 switch (indx) {
630 case 0:
631 lu->options.ColPerm = NATURAL;
632 break;
633 case 1:
634 lu->options.ColPerm = MMD_AT_PLUS_A;
635 break;
636 case 2:
637 lu->options.ColPerm = MMD_ATA;
638 break;
639 case 3:
640 lu->options.ColPerm = METIS_AT_PLUS_A;
641 break;
642 case 4:
643 lu->options.ColPerm = PARMETIS; /* only works for np>1 */
644 break;
645 default:
646 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
647 }
648 }
649
650 lu->options.ReplaceTinyPivot = NO;
651 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
652 if (set && flg) lu->options.ReplaceTinyPivot = YES;
653
654 lu->options.ParSymbFact = NO;
655 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
656 if (set && flg && size > 1) {
657 #if defined(PETSC_HAVE_PARMETIS)
658 lu->options.ParSymbFact = YES;
659 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
660 #else
661 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
662 #endif
663 }
664
665 lu->FactPattern = SamePattern;
666 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
667 if (flg) {
668 switch (indx) {
669 case 0:
670 lu->FactPattern = SamePattern;
671 break;
672 case 1:
673 lu->FactPattern = SamePattern_SameRowPerm;
674 break;
675 case 2:
676 lu->FactPattern = DOFACT;
677 break;
678 }
679 }
680
681 lu->options.IterRefine = NOREFINE;
682 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
683 if (set && flg) lu->options.IterRefine = SLU_DOUBLE;
684
685 if (PetscLogPrintInfo) lu->options.PrintStat = YES;
686 else lu->options.PrintStat = NO;
687 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
688 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
689
690 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(8, 0, 0)
691 lu->options.superlu_acc_offload = 1;
692 PetscCall(PetscOptionsBool("-mat_superlu_dist_gpuoffload", "Offload factorization onto the GPUs", "None", (PetscBool)lu->options.superlu_acc_offload, (PetscBool *)&lu->options.superlu_acc_offload, NULL));
693 #endif
694
695 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg));
696 if (!iflg || context->busy) { /* additional options */
697 if (!iflg) {
698 PetscCall(PetscNew(&context));
699 context->busy = PETSC_TRUE;
700 PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
701 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
702 } else {
703 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
704 }
705
706 /* Default number of process columns and rows */
707 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
708 if (!lu->nprow) lu->nprow = 1;
709 while (lu->nprow > 0) {
710 lu->npcol = (int_t)(size / lu->nprow);
711 if (size == lu->nprow * lu->npcol) break;
712 lu->nprow--;
713 }
714 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
715 lu->use3d = PETSC_FALSE;
716 lu->npdep = 1;
717 #endif
718
719 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
720 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
721 if (lu->use3d) {
722 PetscInt t;
723 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
724 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
725 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
726 if (lu->npdep > 1) {
727 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
728 if (!lu->nprow) lu->nprow = 1;
729 while (lu->nprow > 0) {
730 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
731 if (size == lu->nprow * lu->npcol * lu->npdep) break;
732 lu->nprow--;
733 }
734 }
735 }
736 #endif
737 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
738 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
739 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
740 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
741 #else
742 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
743 #endif
744 /* end of adding additional options */
745
746 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
747 if (lu->use3d) {
748 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, (int)lu->npdep, &lu->grid3d));
749 if (context) {
750 context->grid3d = lu->grid3d;
751 context->use3d = lu->use3d;
752 }
753 } else {
754 #endif
755 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, &lu->grid));
756 if (context) context->grid = lu->grid;
757 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
758 }
759 #endif
760 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
761 if (iflg) {
762 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
763 } else {
764 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
765 }
766 } else { /* (iflg && !context->busy) */
767 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n"));
768 context->busy = PETSC_TRUE;
769 lu->grid = context->grid;
770 }
771 PetscOptionsEnd();
772
773 /* Initialize ScalePermstruct and LUstruct. */
774 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
775 if (lu->singleprecision) {
776 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct));
777 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct));
778 } else
779 #endif
780 {
781 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
782 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
783 }
784 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
785 F->ops->solve = MatSolve_SuperLU_DIST;
786 F->ops->matsolve = MatMatSolve_SuperLU_DIST;
787 F->ops->getinertia = NULL;
788
789 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
790 lu->CleanUpSuperLU_Dist = PETSC_TRUE;
791 PetscFunctionReturn(PETSC_SUCCESS);
792 }
793
MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo * info)794 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
795 {
796 PetscFunctionBegin;
797 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
798 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
799 PetscFunctionReturn(PETSC_SUCCESS);
800 }
801
MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType * type)802 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
803 {
804 PetscFunctionBegin;
805 *type = MATSOLVERSUPERLU_DIST;
806 PetscFunctionReturn(PETSC_SUCCESS);
807 }
808
MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer)809 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
810 {
811 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
812 superlu_dist_options_t options;
813
814 PetscFunctionBegin;
815 /* check if matrix is superlu_dist type */
816 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
817
818 options = lu->options;
819 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
820 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
821 * format spec for int64_t is set to %d for whatever reason */
822 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
823 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
824 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
825 #endif
826
827 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
828 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
829 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
830 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
831
832 switch (options.RowPerm) {
833 case NOROWPERM:
834 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n"));
835 break;
836 case LargeDiag_MC64:
837 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n"));
838 break;
839 case LargeDiag_AWPM:
840 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n"));
841 break;
842 case MY_PERMR:
843 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n"));
844 break;
845 default:
846 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
847 }
848
849 switch (options.ColPerm) {
850 case NATURAL:
851 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n"));
852 break;
853 case MMD_AT_PLUS_A:
854 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n"));
855 break;
856 case MMD_ATA:
857 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n"));
858 break;
859 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
860 case METIS_AT_PLUS_A:
861 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n"));
862 break;
863 case PARMETIS:
864 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n"));
865 break;
866 default:
867 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
868 }
869
870 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
871
872 if (lu->FactPattern == SamePattern) {
873 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n"));
874 } else if (lu->FactPattern == SamePattern_SameRowPerm) {
875 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n"));
876 } else if (lu->FactPattern == DOFACT) {
877 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n"));
878 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
879 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
880 if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n"));
881 #endif
882 PetscFunctionReturn(PETSC_SUCCESS);
883 }
884
MatView_SuperLU_DIST(Mat A,PetscViewer viewer)885 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
886 {
887 PetscBool isascii;
888 PetscViewerFormat format;
889
890 PetscFunctionBegin;
891 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
892 if (isascii) {
893 PetscCall(PetscViewerGetFormat(viewer, &format));
894 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
895 }
896 PetscFunctionReturn(PETSC_SUCCESS);
897 }
898
MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat * F)899 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
900 {
901 Mat B;
902 Mat_SuperLU_DIST *lu;
903 PetscInt M = A->rmap->N, N = A->cmap->N;
904 PetscMPIInt size;
905 superlu_dist_options_t options;
906 PetscBool flg;
907 PetscPrecision precision = PETSC_PRECISION_INVALID;
908
909 PetscFunctionBegin;
910 /* Create the factorization matrix */
911 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
912 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
913 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
914 PetscCall(MatSetUp(B));
915 B->ops->getinfo = MatGetInfo_External;
916 B->ops->view = MatView_SuperLU_DIST;
917 B->ops->destroy = MatDestroy_SuperLU_DIST;
918
919 /* set_default_options_dist() sets the default input options to the following values:
920 options.Fact = DOFACT;
921 options.Equil = YES;
922 options.ParSymbFact = NO;
923 options.ColPerm = METIS_AT_PLUS_A;
924 options.RowPerm = LargeDiag_MC64;
925 options.ReplaceTinyPivot = NO;
926 options.IterRefine = SLU_DOUBLE;
927 options.Trans = NOTRANS;
928 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
929 options.RefineInitialized = NO;
930 options.PrintStat = YES;
931 options.superlu_acc_offload = 1;
932 options.SymPattern = NO;
933 */
934 set_default_options_dist(&options);
935
936 B->trivialsymbolic = PETSC_TRUE;
937 if (ftype == MAT_FACTOR_LU) {
938 B->factortype = MAT_FACTOR_LU;
939 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
940 } else {
941 B->factortype = MAT_FACTOR_CHOLESKY;
942 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
943 options.SymPattern = YES;
944 }
945
946 /* set solvertype */
947 PetscCall(PetscFree(B->solvertype));
948 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
949
950 PetscCall(PetscNew(&lu));
951 B->data = lu;
952 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
953
954 lu->options = options;
955 lu->options.Fact = DOFACT;
956 lu->matsolve_iscalled = PETSC_FALSE;
957 lu->matmatsolve_iscalled = PETSC_FALSE;
958
959 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "SuperLU_DIST Options", "Mat");
960 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by SuperLU_DIST", "MATSOLVERSUPERLU_DIST", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, &flg));
961 PetscOptionsEnd();
962 if (flg) {
963 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single or double as option for SuperLU_DIST");
964 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
965 lu->singleprecision = (PetscBool)(precision == PETSC_PRECISION_SINGLE); // It also implies PetscReal is not single; not merely SuperLU_DIST is running in single
966 #endif
967 }
968
969 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
970 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
971
972 *F = B;
973 PetscFunctionReturn(PETSC_SUCCESS);
974 }
975
MatSolverTypeRegister_SuperLU_DIST(void)976 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
977 {
978 PetscFunctionBegin;
979 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
980 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
981 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
982 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
983 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
984 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL));
985 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
986 }
987 PetscFunctionReturn(PETSC_SUCCESS);
988 }
989
990 /*MC
991 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
992
993 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST
994
995 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
996
997 Works with `MATAIJ` matrices
998
999 Options Database Keys:
1000 + -mat_superlu_dist_r <n> - number of rows in processor partition
1001 . -mat_superlu_dist_c <n> - number of columns in processor partition
1002 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
1003 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
1004 . -mat_superlu_dist_equil - equilibrate the matrix
1005 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
1006 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
1007 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
1008 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
1009 . -mat_superlu_dist_iterrefine - use iterative refinement
1010 . -mat_superlu_dist_printstat - print factorization information
1011 . -mat_superlu_dist_gpuoffload - offload factorization onto the GPUs, requires SuperLU_DIST 8.0.0 or later
1012 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision
1013
1014 Level: beginner
1015
1016 Note:
1017 If PETSc was configured with `--with-cuda` then this solver will use the GPUs by default.
1018
1019 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
1020 M*/
1021