1448de63eSKris Buschelman /*
298dbe763SHong Zhang Provides an interface to the SuperLU_DIST sparse solver
3448de63eSKris Buschelman */
4448de63eSKris Buschelman
5c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
7a7b55187SBarry Smith #include <petscpkg_version.h>
8448de63eSKris Buschelman
9a51cd166SPierre Jolivet PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef")
10448de63eSKris Buschelman EXTERN_C_BEGIN
112695cf96SNuno Nobre // To suppress compiler warnings
122695cf96SNuno Nobre #define DISABLE_CUSPARSE_DEPRECATED
13448de63eSKris Buschelman #if defined(PETSC_USE_COMPLEX)
143e558968SBarry Smith #define CASTDOUBLECOMPLEX (doublecomplex *)
15d4783600SBarry Smith #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
16c6db04a5SJed Brown #include <superlu_zdefs.h>
17a7b55187SBarry Smith #define LUstructInit zLUstructInit
18a7b55187SBarry Smith #define ScalePermstructInit zScalePermstructInit
19a7b55187SBarry Smith #define ScalePermstructFree zScalePermstructFree
20a7b55187SBarry Smith #define LUstructFree zLUstructFree
21a7b55187SBarry Smith #define Destroy_LU zDestroy_LU
22a7b55187SBarry Smith #define ScalePermstruct_t zScalePermstruct_t
23a7b55187SBarry Smith #define LUstruct_t zLUstruct_t
24a7b55187SBarry Smith #define SOLVEstruct_t zSOLVEstruct_t
253e558968SBarry Smith #define SolveFinalize zSolveFinalize
263e558968SBarry Smith #define pGetDiagU pzGetDiagU
27d4783600SBarry Smith #define pgssvx pzgssvx
283e558968SBarry Smith #define allocateA_dist zallocateA_dist
29d4783600SBarry Smith #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
30d4783600SBarry Smith #define SLU SLU_Z
319e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
323e558968SBarry Smith #define DeAllocLlu_3d zDeAllocLlu_3d
333e558968SBarry Smith #define DeAllocGlu_3d zDeAllocGlu_3d
343e558968SBarry Smith #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
353e558968SBarry Smith #define pgssvx3d pzgssvx3d
36a7b55187SBarry Smith #endif
37d4783600SBarry Smith #elif defined(PETSC_USE_REAL_SINGLE)
38d4783600SBarry Smith #define CASTDOUBLECOMPLEX
39d4783600SBarry Smith #define CASTDOUBLECOMPLEXSTAR
40d4783600SBarry Smith #include <superlu_sdefs.h>
41d4783600SBarry Smith #define LUstructInit sLUstructInit
42d4783600SBarry Smith #define ScalePermstructInit sScalePermstructInit
43d4783600SBarry Smith #define ScalePermstructFree sScalePermstructFree
44d4783600SBarry Smith #define LUstructFree sLUstructFree
45d4783600SBarry Smith #define Destroy_LU sDestroy_LU
46d4783600SBarry Smith #define ScalePermstruct_t sScalePermstruct_t
47d4783600SBarry Smith #define LUstruct_t sLUstruct_t
48d4783600SBarry Smith #define SOLVEstruct_t sSOLVEstruct_t
49d4783600SBarry Smith #define SolveFinalize sSolveFinalize
50d4783600SBarry Smith #define pGetDiagU psGetDiagU
51d4783600SBarry Smith #define pgssvx psgssvx
52d4783600SBarry Smith #define allocateA_dist sallocateA_dist
53d4783600SBarry Smith #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
54d4783600SBarry Smith #define SLU SLU_S
559e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
56d4783600SBarry Smith #define DeAllocLlu_3d sDeAllocLlu_3d
57d4783600SBarry Smith #define DeAllocGlu_3d sDeAllocGlu_3d
58d4783600SBarry Smith #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
59d4783600SBarry Smith #define pgssvx3d psgssvx3d
60d4783600SBarry Smith #endif
61448de63eSKris Buschelman #else
623e558968SBarry Smith #define CASTDOUBLECOMPLEX
63d4783600SBarry Smith #define CASTDOUBLECOMPLEXSTAR
64c6db04a5SJed Brown #include <superlu_ddefs.h>
65a7b55187SBarry Smith #define LUstructInit dLUstructInit
66a7b55187SBarry Smith #define ScalePermstructInit dScalePermstructInit
67a7b55187SBarry Smith #define ScalePermstructFree dScalePermstructFree
68a7b55187SBarry Smith #define LUstructFree dLUstructFree
69a7b55187SBarry Smith #define Destroy_LU dDestroy_LU
70a7b55187SBarry Smith #define ScalePermstruct_t dScalePermstruct_t
71a7b55187SBarry Smith #define LUstruct_t dLUstruct_t
72a7b55187SBarry Smith #define SOLVEstruct_t dSOLVEstruct_t
733e558968SBarry Smith #define SolveFinalize dSolveFinalize
743e558968SBarry Smith #define pGetDiagU pdGetDiagU
75d4783600SBarry Smith #define pgssvx pdgssvx
763e558968SBarry Smith #define allocateA_dist dallocateA_dist
77d4783600SBarry Smith #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
78d4783600SBarry Smith #define SLU SLU_D
799e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
803e558968SBarry Smith #define DeAllocLlu_3d dDeAllocLlu_3d
813e558968SBarry Smith #define DeAllocGlu_3d dDeAllocGlu_3d
823e558968SBarry Smith #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
833e558968SBarry Smith #define pgssvx3d pdgssvx3d
84a7b55187SBarry Smith #endif
85448de63eSKris Buschelman #endif
86a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
87b2d1094fSBarry Smith #include <superlu_sdefs.h>
88a6ad605dSBarry Smith #endif
89448de63eSKris Buschelman EXTERN_C_END
90a51cd166SPierre Jolivet PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
91448de63eSKris Buschelman
92448de63eSKris Buschelman typedef struct {
93448de63eSKris Buschelman int_t nprow, npcol, *row, *col;
94448de63eSKris Buschelman gridinfo_t grid;
959e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
963e558968SBarry Smith PetscBool use3d;
973e558968SBarry Smith int_t npdep; /* replication factor, must be power of two */
983e558968SBarry Smith gridinfo3d_t grid3d;
993e558968SBarry Smith #endif
10072210a05SBarry Smith superlu_dist_options_t options;
101448de63eSKris Buschelman SuperMatrix A_sup;
102448de63eSKris Buschelman ScalePermstruct_t ScalePermstruct;
103448de63eSKris Buschelman LUstruct_t LUstruct;
104448de63eSKris Buschelman int StatPrint;
105448de63eSKris Buschelman SOLVEstruct_t SOLVEstruct;
10619660a1cSHong Zhang fact_t FactPattern;
107448de63eSKris Buschelman MPI_Comm comm_superlu;
108d4783600SBarry Smith PetscScalar *val;
109ace3abfcSBarry Smith PetscBool matsolve_iscalled, matmatsolve_iscalled;
110ace3abfcSBarry Smith PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
111a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
112a6ad605dSBarry Smith sScalePermstruct_t sScalePermstruct;
113a6ad605dSBarry Smith sLUstruct_t sLUstruct;
114a6ad605dSBarry Smith sSOLVEstruct_t sSOLVEstruct;
115a6ad605dSBarry Smith float *sval;
116b2d1094fSBarry Smith PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */
117a6ad605dSBarry Smith float *sbptr;
118a6ad605dSBarry Smith #endif
119f0c56d0fSKris Buschelman } Mat_SuperLU_DIST;
120f0c56d0fSKris Buschelman
MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar * diagU)12166976f2fSJacob Faibussowitsch static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
122d71ae5a4SJacob Faibussowitsch {
12398dbe763SHong Zhang Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
124fb785984SHong Zhang
125fb785984SHong Zhang PetscFunctionBegin;
126e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
1273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
128fb785984SHong Zhang }
129fb785984SHong Zhang
MatSuperluDistGetDiagU(Mat F,PetscScalar * diagU)130d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
131d71ae5a4SJacob Faibussowitsch {
132fb785984SHong Zhang PetscFunctionBegin;
133fb785984SHong Zhang PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
134cac4c232SBarry Smith PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
1353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
13698dbe763SHong Zhang }
13798dbe763SHong Zhang
138ed876474SBarry Smith /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
139ed876474SBarry Smith typedef struct {
140ed876474SBarry Smith MPI_Comm comm;
141ed876474SBarry Smith PetscBool busy;
142ed876474SBarry Smith gridinfo_t grid;
1439e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
1443e558968SBarry Smith PetscBool use3d;
1453e558968SBarry Smith gridinfo3d_t grid3d;
1463e558968SBarry Smith #endif
147ed876474SBarry Smith } PetscSuperLU_DIST;
1487def7855SStefano Zampini
149ed876474SBarry Smith static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
150ed876474SBarry Smith
Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm,PetscMPIInt keyval,void * attr_val,void * extra_state)1518434afd1SBarry Smith PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
152d71ae5a4SJacob Faibussowitsch {
153ed876474SBarry Smith PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
154ed876474SBarry Smith
155ed876474SBarry Smith PetscFunctionBegin;
1567c5b2466SBarry Smith PetscCheckReturnMPI(keyval == Petsc_Superlu_dist_keyval, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
1579e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
1583e558968SBarry Smith if (context->use3d) {
159e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
1603e558968SBarry Smith } else
1613e558968SBarry Smith #endif
162e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
1637c5b2466SBarry Smith PetscCallMPIReturnMPI(MPI_Comm_free(&context->comm));
1647c5b2466SBarry Smith PetscCallReturnMPI(PetscFree(context));
165ed876474SBarry Smith PetscFunctionReturn(MPI_SUCCESS);
166ed876474SBarry Smith }
167ed876474SBarry Smith
16895a31119SBarry Smith /*
16995a31119SBarry Smith Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
17095a31119SBarry Smith users who do not destroy all PETSc objects before PetscFinalize().
17195a31119SBarry Smith
1728434afd1SBarry Smith The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn()
17395a31119SBarry Smith can still check that the keyval associated with the MPI communicator is correct when the MPI
17495a31119SBarry Smith communicator is destroyed.
17595a31119SBarry Smith
17695a31119SBarry Smith This is called in PetscFinalize()
17795a31119SBarry Smith */
Petsc_Superlu_dist_keyval_free(void)178d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
179d71ae5a4SJacob Faibussowitsch {
18095a31119SBarry Smith PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
181ed876474SBarry Smith
182ed876474SBarry Smith PetscFunctionBegin;
1839566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
1843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
185ed876474SBarry Smith }
186ed876474SBarry Smith
MatDestroy_SuperLU_DIST(Mat A)187d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
188d71ae5a4SJacob Faibussowitsch {
189a305db0bSBarry Smith Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
190448de63eSKris Buschelman
191448de63eSKris Buschelman PetscFunctionBegin;
192a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
193b2d1094fSBarry Smith PetscCall(PetscFree(lu->sbptr));
194a6ad605dSBarry Smith #endif
195a305db0bSBarry Smith if (lu->CleanUpSuperLU_Dist) {
196448de63eSKris Buschelman /* Deallocate SuperLU_DIST storage */
197e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
198b2d1094fSBarry Smith if (lu->options.SolveInitialized) {
199a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
200b2d1094fSBarry Smith if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
201a6ad605dSBarry Smith else
202a6ad605dSBarry Smith #endif
203a6ad605dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
204b2d1094fSBarry Smith }
2059e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
2063e558968SBarry Smith if (lu->use3d) {
207a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
208b2d1094fSBarry Smith if (lu->singleprecision) {
2099e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
2109e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d));
2119e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
2129e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
213a6ad605dSBarry Smith } else
214a6ad605dSBarry Smith #endif
215a6ad605dSBarry Smith {
2169e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
217a6ad605dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
2189e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
2199e48eb9fSliuyangzhuan PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
2209e48eb9fSliuyangzhuan }
2213e558968SBarry Smith } else
2223e558968SBarry Smith #endif
223a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
224b2d1094fSBarry Smith if (lu->singleprecision) {
225b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct));
226b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
227b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
228a6ad605dSBarry Smith } else
229a6ad605dSBarry Smith #endif
230a6ad605dSBarry Smith {
231e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
232e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
233e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
234b2d1094fSBarry Smith }
2353e558968SBarry Smith /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
236ed876474SBarry Smith if (lu->comm_superlu) {
2379e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
2383e558968SBarry Smith if (lu->use3d) {
239e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
2403e558968SBarry Smith } else
2413e558968SBarry Smith #endif
242e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
243c866f80eSFande Kong }
244c866f80eSFande Kong }
245c866f80eSFande Kong /*
246c866f80eSFande Kong * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
247c866f80eSFande Kong * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
248c866f80eSFande Kong * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
249c866f80eSFande Kong * is off, and the communicator was not released or marked as "not busy " in the old code.
250c866f80eSFande Kong * Here we try to release comm regardless.
251c866f80eSFande Kong */
252c866f80eSFande Kong if (lu->comm_superlu) {
2539566063dSJacob Faibussowitsch PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
254ed876474SBarry Smith } else {
255ed876474SBarry Smith PetscSuperLU_DIST *context;
256ed876474SBarry Smith MPI_Comm comm;
257b8b5be36SMartin Diehl PetscMPIInt iflg;
258ed876474SBarry Smith
2599566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
260b8b5be36SMartin Diehl PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg));
261b8b5be36SMartin Diehl if (iflg) context->busy = PETSC_FALSE;
262ed876474SBarry Smith }
263c866f80eSFande Kong
2649566063dSJacob Faibussowitsch PetscCall(PetscFree(A->data));
265fb785984SHong Zhang /* clear composed functions */
2669566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
2679566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
2683ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
269448de63eSKris Buschelman }
270448de63eSKris Buschelman
MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)271d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
272d71ae5a4SJacob Faibussowitsch {
273a305db0bSBarry Smith Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
2741e8514b5SHong Zhang PetscInt m = A->rmap->n;
275448de63eSKris Buschelman SuperLUStat_t stat;
276d4783600SBarry Smith PetscReal berr[1];
2777cac11d5SHong Zhang PetscScalar *bptr = NULL;
278a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
279b2d1094fSBarry Smith float sberr[1];
280a6ad605dSBarry Smith #endif
28115082402SBarry Smith int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
282dff31646SBarry Smith static PetscBool cite = PETSC_FALSE;
283448de63eSKris Buschelman
284448de63eSKris Buschelman PetscFunctionBegin;
28595ca0fccSBarry Smith PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
2869371c9d4SSatish Balay PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM "
2879371c9d4SSatish Balay "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",
2889371c9d4SSatish Balay &cite));
289b7d2c110SHong Zhang
29049d41e62SHong Zhang if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
29149d41e62SHong Zhang /* see comments in MatMatSolve() */
292a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
293b2d1094fSBarry Smith if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
294a6ad605dSBarry Smith else
295a6ad605dSBarry Smith #endif
296a6ad605dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
29749d41e62SHong Zhang lu->options.SolveInitialized = NO;
298448de63eSKris Buschelman }
2999566063dSJacob Faibussowitsch PetscCall(VecCopy(b_mpi, x));
3009566063dSJacob Faibussowitsch PetscCall(VecGetArray(x, &bptr));
301a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
302b2d1094fSBarry Smith if (lu->singleprecision) {
303b2d1094fSBarry Smith PetscInt n;
304b2d1094fSBarry Smith PetscCall(VecGetLocalSize(x, &n));
305b2d1094fSBarry Smith if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr));
306b2d1094fSBarry Smith for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
307b2d1094fSBarry Smith }
308a6ad605dSBarry Smith #endif
309448de63eSKris Buschelman
310e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
3119e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
312b2d1094fSBarry Smith if (lu->use3d) {
313a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
3149ad2cedaSPierre Jolivet if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
315a6ad605dSBarry Smith else
316a6ad605dSBarry Smith #endif
3179ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
3189e48eb9fSliuyangzhuan PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx3d fails, info: %d", info);
319b2d1094fSBarry Smith } else
320448de63eSKris Buschelman #endif
3219e48eb9fSliuyangzhuan {
322a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
3239ad2cedaSPierre Jolivet if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
324a6ad605dSBarry Smith else
325a6ad605dSBarry Smith #endif
3269ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
3279e48eb9fSliuyangzhuan PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx fails, info: %d", info);
3289e48eb9fSliuyangzhuan }
329e77caa6dSBarry Smith if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
330e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
331a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
332b2d1094fSBarry Smith if (lu->singleprecision) {
333b2d1094fSBarry Smith PetscInt n;
334b2d1094fSBarry Smith PetscCall(VecGetLocalSize(x, &n));
335b2d1094fSBarry Smith for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i];
336b2d1094fSBarry Smith }
337a6ad605dSBarry Smith #endif
3389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(x, &bptr));
33949d41e62SHong Zhang lu->matsolve_iscalled = PETSC_TRUE;
34049d41e62SHong Zhang lu->matmatsolve_iscalled = PETSC_FALSE;
3413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
342448de63eSKris Buschelman }
343448de63eSKris Buschelman
MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)344d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
345d71ae5a4SJacob Faibussowitsch {
346a305db0bSBarry Smith Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
3471e8514b5SHong Zhang PetscInt m = A->rmap->n, nrhs;
34849d41e62SHong Zhang SuperLUStat_t stat;
349d4783600SBarry Smith PetscReal berr[1];
35049d41e62SHong Zhang PetscScalar *bptr;
35149d41e62SHong Zhang int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
352bda8bf91SBarry Smith PetscBool flg;
35349d41e62SHong Zhang
35449d41e62SHong Zhang PetscFunctionBegin;
355a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
356b2d1094fSBarry Smith PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single");
357a6ad605dSBarry Smith #endif
35895ca0fccSBarry Smith PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
3599566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
36095ca0fccSBarry Smith PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
361f9fb9879SHong Zhang if (X != B_mpi) {
3629566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
36328b400f6SJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
364f9fb9879SHong Zhang }
36549d41e62SHong Zhang
36649d41e62SHong Zhang if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
36749d41e62SHong Zhang /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
36849d41e62SHong Zhang thus destroy it and create a new SOLVEstruct.
36949d41e62SHong Zhang Otherwise it may result in memory corruption or incorrect solution
370c4762a1bSJed Brown See src/mat/tests/ex125.c */
371e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
37249d41e62SHong Zhang lu->options.SolveInitialized = NO;
37349d41e62SHong Zhang }
37448a46eb9SPierre Jolivet if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
37549d41e62SHong Zhang
3769566063dSJacob Faibussowitsch PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
37749d41e62SHong Zhang
378e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
3799566063dSJacob Faibussowitsch PetscCall(MatDenseGetArray(X, &bptr));
3801e8514b5SHong Zhang
3819e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
3829ad2cedaSPierre Jolivet if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
3833e558968SBarry Smith else
38449d41e62SHong Zhang #endif
3859ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
38695ca0fccSBarry Smith PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
3879566063dSJacob Faibussowitsch PetscCall(MatDenseRestoreArray(X, &bptr));
38849d41e62SHong Zhang
389e77caa6dSBarry Smith if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
390e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
39149d41e62SHong Zhang lu->matsolve_iscalled = PETSC_FALSE;
39249d41e62SHong Zhang lu->matmatsolve_iscalled = PETSC_TRUE;
3933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
39449d41e62SHong Zhang }
39549d41e62SHong Zhang
3963d4d7ae9SHong Zhang /*
3973d4d7ae9SHong Zhang input:
3983d4d7ae9SHong Zhang F: numeric Cholesky factor
3993d4d7ae9SHong Zhang output:
4003d4d7ae9SHong Zhang nneg: total number of negative pivots
4013d4d7ae9SHong Zhang nzero: total number of zero pivots
4023d4d7ae9SHong Zhang npos: (global dimension of F) - nneg - nzero
4033d4d7ae9SHong Zhang */
MatGetInertia_SuperLU_DIST(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)404d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
405d71ae5a4SJacob Faibussowitsch {
4063d4d7ae9SHong Zhang Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
4073d4d7ae9SHong Zhang PetscScalar *diagU = NULL;
4083d4d7ae9SHong Zhang PetscInt M, i, neg = 0, zero = 0, pos = 0;
40909d31890SHong Zhang PetscReal r;
4103d4d7ae9SHong Zhang
4113d4d7ae9SHong Zhang PetscFunctionBegin;
412a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
413b2d1094fSBarry Smith PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single");
414a6ad605dSBarry Smith #endif
41595ca0fccSBarry Smith PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
41695ca0fccSBarry Smith PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
4179566063dSJacob Faibussowitsch PetscCall(MatGetSize(F, &M, NULL));
4189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M, &diagU));
4199566063dSJacob Faibussowitsch PetscCall(MatSuperluDistGetDiagU(F, diagU));
4203d4d7ae9SHong Zhang for (i = 0; i < M; i++) {
42109d31890SHong Zhang #if defined(PETSC_USE_COMPLEX)
422b1fc5afdSJose E. Roman r = PetscAbsReal(PetscImaginaryPart(diagU[i]));
423b1fc5afdSJose E. Roman PetscCheck(r < 1000 * PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)PetscImaginaryPart(diagU[i]));
42409d31890SHong Zhang r = PetscRealPart(diagU[i]);
42509d31890SHong Zhang #else
42609d31890SHong Zhang r = diagU[i];
42709d31890SHong Zhang #endif
42809d31890SHong Zhang if (r > 0) {
4293d4d7ae9SHong Zhang pos++;
43009d31890SHong Zhang } else if (r < 0) {
4313d4d7ae9SHong Zhang neg++;
4323d4d7ae9SHong Zhang } else zero++;
4333d4d7ae9SHong Zhang }
43409d31890SHong Zhang
4359566063dSJacob Faibussowitsch PetscCall(PetscFree(diagU));
4362e38f1a5SHong Zhang if (nneg) *nneg = neg;
4372e38f1a5SHong Zhang if (nzero) *nzero = zero;
4382e38f1a5SHong Zhang if (npos) *npos = pos;
4393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4403d4d7ae9SHong Zhang }
44149d41e62SHong Zhang
MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo * info)442d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
443d71ae5a4SJacob Faibussowitsch {
4443d4d7ae9SHong Zhang Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
445c8f76b2fSStefano Zampini Mat Aloc;
446c8f76b2fSStefano Zampini const PetscScalar *av;
447c8f76b2fSStefano Zampini const PetscInt *ai = NULL, *aj = NULL;
448*2a8381b2SBarry Smith PetscInt nz, unused;
44915082402SBarry Smith int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
450448de63eSKris Buschelman SuperLUStat_t stat;
451d4783600SBarry Smith PetscReal *berr = 0;
452a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
453b2d1094fSBarry Smith float *sberr = 0;
454a6ad605dSBarry Smith #endif
455c8f76b2fSStefano Zampini PetscBool ismpiaij, isseqaij, flg;
456448de63eSKris Buschelman
457448de63eSKris Buschelman PetscFunctionBegin;
4589566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
4599566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
460c8f76b2fSStefano Zampini if (ismpiaij) {
4619566063dSJacob Faibussowitsch PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
462c8f76b2fSStefano Zampini } else if (isseqaij) {
4639566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)A));
464c8f76b2fSStefano Zampini Aloc = A;
46598921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
466401c3357SHong Zhang
467*2a8381b2SBarry Smith PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &unused, &ai, &aj, &flg));
46895ca0fccSBarry Smith PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
4699566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
470c8f76b2fSStefano Zampini nz = ai[Aloc->rmap->n];
471448de63eSKris Buschelman
4721e8514b5SHong Zhang /* Allocations for A_sup */
47319660a1cSHong Zhang if (lu->options.Fact == DOFACT) { /* first numeric factorization */
474a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
475b2d1094fSBarry Smith if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
476a6ad605dSBarry Smith else
477a6ad605dSBarry Smith #endif
478a6ad605dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
479448de63eSKris Buschelman } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
48019660a1cSHong Zhang if (lu->FactPattern == SamePattern_SameRowPerm) {
48119660a1cSHong Zhang lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
48228865de0SHong Zhang } else if (lu->FactPattern == SamePattern) {
4839e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
4843e558968SBarry Smith if (lu->use3d) {
485a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
486b2d1094fSBarry Smith if (lu->singleprecision) {
487b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
488b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
489a6ad605dSBarry Smith } else
490a6ad605dSBarry Smith #endif
491a6ad605dSBarry Smith {
492e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
493e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
494b2d1094fSBarry Smith }
4953e558968SBarry Smith } else
4963e558968SBarry Smith #endif
497a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
498b2d1094fSBarry Smith if (lu->singleprecision)
499b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
500a6ad605dSBarry Smith else
501a6ad605dSBarry Smith #endif
502a6ad605dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
503448de63eSKris Buschelman lu->options.Fact = SamePattern;
504f1869c40SHong Zhang } else if (lu->FactPattern == DOFACT) {
505e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
506a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
507a6ad605dSBarry Smith if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
508a6ad605dSBarry Smith else
509a6ad605dSBarry Smith #endif
510e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
511f1869c40SHong Zhang lu->options.Fact = DOFACT;
512a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
513b2d1094fSBarry Smith if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
514a6ad605dSBarry Smith else
515a6ad605dSBarry Smith #endif
516a6ad605dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
517ed876474SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
51819660a1cSHong Zhang }
5191e8514b5SHong Zhang
5201e8514b5SHong Zhang /* Copy AIJ matrix to superlu_dist matrix */
5219566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
5229566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(lu->col, aj, nz));
523a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
524b2d1094fSBarry Smith if (lu->singleprecision)
525b2d1094fSBarry Smith for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
526a6ad605dSBarry Smith else
527a6ad605dSBarry Smith #endif
528a6ad605dSBarry Smith PetscCall(PetscArraycpy(lu->val, av, nz));
529*2a8381b2SBarry Smith PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &unused, &ai, &aj, &flg));
53095ca0fccSBarry Smith PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
5319566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
5329566063dSJacob Faibussowitsch PetscCall(MatDestroy(&Aloc));
53328865de0SHong Zhang
5341e8514b5SHong Zhang /* Create and setup A_sup */
53528865de0SHong Zhang if (lu->options.Fact == DOFACT) {
536a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
537b2d1094fSBarry Smith if (lu->singleprecision)
538b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE));
539b2d1094fSBarry Smith else
540a6ad605dSBarry Smith #endif
541e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
542448de63eSKris Buschelman }
543448de63eSKris Buschelman
544448de63eSKris Buschelman /* Factor the matrix. */
545e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
5469e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
5473e558968SBarry Smith if (lu->use3d) {
548a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
5499ad2cedaSPierre Jolivet if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
550a6ad605dSBarry Smith else
551a6ad605dSBarry Smith #endif
5529ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
5533e558968SBarry Smith } else
554448de63eSKris Buschelman #endif
5559e48eb9fSliuyangzhuan {
556a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
5579ad2cedaSPierre Jolivet if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
558a6ad605dSBarry Smith else
559a6ad605dSBarry Smith #endif
5609ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
5619e48eb9fSliuyangzhuan }
5621b4120e5SHong Zhang if (sinfo > 0) {
56395ca0fccSBarry Smith PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
5641b4120e5SHong Zhang if (sinfo <= lu->A_sup.ncol) {
565603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
5669566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
5671b4120e5SHong Zhang } else if (sinfo > lu->A_sup.ncol) {
5681b4120e5SHong Zhang /*
5691b4120e5SHong Zhang number of bytes allocated when memory allocation
5701b4120e5SHong Zhang failure occurred, plus A->ncol.
5711b4120e5SHong Zhang */
572603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY;
5739566063dSJacob Faibussowitsch PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
5741b4120e5SHong Zhang }
57595ca0fccSBarry Smith } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
5761b4120e5SHong Zhang
577ac530a7eSPierre Jolivet if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
578e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
5793d4d7ae9SHong Zhang F->assembled = PETSC_TRUE;
5803d4d7ae9SHong Zhang F->preallocated = PETSC_TRUE;
58119660a1cSHong Zhang lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
5823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
583448de63eSKris Buschelman }
584448de63eSKris Buschelman
585f0b74427SPierre Jolivet /* Note the PETSc r and c permutations are ignored */
MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)586d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
587d71ae5a4SJacob Faibussowitsch {
588a305db0bSBarry Smith Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
58926cc229bSBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N, indx;
590b8b5be36SMartin Diehl PetscMPIInt size, iflg;
59126cc229bSBarry Smith PetscBool flg, set;
59226cc229bSBarry Smith const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
59326cc229bSBarry Smith const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
59426cc229bSBarry Smith const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
59526cc229bSBarry Smith MPI_Comm comm;
59626cc229bSBarry Smith PetscSuperLU_DIST *context = NULL;
597b24902e0SBarry Smith
598b24902e0SBarry Smith PetscFunctionBegin;
59926cc229bSBarry Smith /* Set options to F */
60026cc229bSBarry Smith PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
60126cc229bSBarry Smith PetscCallMPI(MPI_Comm_size(comm, &size));
60226cc229bSBarry Smith
60326cc229bSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
60426cc229bSBarry Smith PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
60526cc229bSBarry Smith if (set && !flg) lu->options.Equil = NO;
60626cc229bSBarry Smith
60726cc229bSBarry Smith PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
60826cc229bSBarry Smith if (flg) {
60926cc229bSBarry Smith switch (indx) {
610d71ae5a4SJacob Faibussowitsch case 0:
611d71ae5a4SJacob Faibussowitsch lu->options.RowPerm = NOROWPERM;
612d71ae5a4SJacob Faibussowitsch break;
613d71ae5a4SJacob Faibussowitsch case 1:
614d71ae5a4SJacob Faibussowitsch lu->options.RowPerm = LargeDiag_MC64;
615d71ae5a4SJacob Faibussowitsch break;
616d71ae5a4SJacob Faibussowitsch case 2:
617d71ae5a4SJacob Faibussowitsch lu->options.RowPerm = LargeDiag_AWPM;
618d71ae5a4SJacob Faibussowitsch break;
619d71ae5a4SJacob Faibussowitsch case 3:
620d71ae5a4SJacob Faibussowitsch lu->options.RowPerm = MY_PERMR;
621d71ae5a4SJacob Faibussowitsch break;
622d71ae5a4SJacob Faibussowitsch default:
623d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
62426cc229bSBarry Smith }
62526cc229bSBarry Smith }
62626cc229bSBarry Smith
62726cc229bSBarry Smith PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
62826cc229bSBarry Smith if (flg) {
62926cc229bSBarry Smith switch (indx) {
630d71ae5a4SJacob Faibussowitsch case 0:
631d71ae5a4SJacob Faibussowitsch lu->options.ColPerm = NATURAL;
632d71ae5a4SJacob Faibussowitsch break;
633d71ae5a4SJacob Faibussowitsch case 1:
634d71ae5a4SJacob Faibussowitsch lu->options.ColPerm = MMD_AT_PLUS_A;
635d71ae5a4SJacob Faibussowitsch break;
636d71ae5a4SJacob Faibussowitsch case 2:
637d71ae5a4SJacob Faibussowitsch lu->options.ColPerm = MMD_ATA;
638d71ae5a4SJacob Faibussowitsch break;
639d71ae5a4SJacob Faibussowitsch case 3:
640d71ae5a4SJacob Faibussowitsch lu->options.ColPerm = METIS_AT_PLUS_A;
641d71ae5a4SJacob Faibussowitsch break;
64226cc229bSBarry Smith case 4:
64326cc229bSBarry Smith lu->options.ColPerm = PARMETIS; /* only works for np>1 */
64426cc229bSBarry Smith break;
645d71ae5a4SJacob Faibussowitsch default:
646d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
64726cc229bSBarry Smith }
64826cc229bSBarry Smith }
64926cc229bSBarry Smith
65026cc229bSBarry Smith lu->options.ReplaceTinyPivot = NO;
65126cc229bSBarry Smith PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
65226cc229bSBarry Smith if (set && flg) lu->options.ReplaceTinyPivot = YES;
65326cc229bSBarry Smith
65426cc229bSBarry Smith lu->options.ParSymbFact = NO;
65526cc229bSBarry Smith PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
65626cc229bSBarry Smith if (set && flg && size > 1) {
65726cc229bSBarry Smith #if defined(PETSC_HAVE_PARMETIS)
65826cc229bSBarry Smith lu->options.ParSymbFact = YES;
65926cc229bSBarry Smith lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
66026cc229bSBarry Smith #else
66126cc229bSBarry Smith SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
66226cc229bSBarry Smith #endif
66326cc229bSBarry Smith }
66426cc229bSBarry Smith
66526cc229bSBarry Smith lu->FactPattern = SamePattern;
66626cc229bSBarry Smith PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
66726cc229bSBarry Smith if (flg) {
66826cc229bSBarry Smith switch (indx) {
669d71ae5a4SJacob Faibussowitsch case 0:
670d71ae5a4SJacob Faibussowitsch lu->FactPattern = SamePattern;
671d71ae5a4SJacob Faibussowitsch break;
672d71ae5a4SJacob Faibussowitsch case 1:
673d71ae5a4SJacob Faibussowitsch lu->FactPattern = SamePattern_SameRowPerm;
674d71ae5a4SJacob Faibussowitsch break;
675d71ae5a4SJacob Faibussowitsch case 2:
676d71ae5a4SJacob Faibussowitsch lu->FactPattern = DOFACT;
677d71ae5a4SJacob Faibussowitsch break;
67826cc229bSBarry Smith }
67926cc229bSBarry Smith }
68026cc229bSBarry Smith
68126cc229bSBarry Smith lu->options.IterRefine = NOREFINE;
68226cc229bSBarry Smith PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
6833e56d16dSNuno Nobre if (set && flg) lu->options.IterRefine = SLU_DOUBLE;
68426cc229bSBarry Smith
68526cc229bSBarry Smith if (PetscLogPrintInfo) lu->options.PrintStat = YES;
68626cc229bSBarry Smith else lu->options.PrintStat = NO;
687e9b55caeSJunchao Zhang PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
688e9b55caeSJunchao Zhang PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
68926cc229bSBarry Smith
690a0940d2aSSatish Balay #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(8, 0, 0)
6913e56d16dSNuno Nobre lu->options.superlu_acc_offload = 1;
6923e56d16dSNuno Nobre PetscCall(PetscOptionsBool("-mat_superlu_dist_gpuoffload", "Offload factorization onto the GPUs", "None", (PetscBool)lu->options.superlu_acc_offload, (PetscBool *)&lu->options.superlu_acc_offload, NULL));
693a0940d2aSSatish Balay #endif
6943e56d16dSNuno Nobre
695b8b5be36SMartin Diehl PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg));
696b8b5be36SMartin Diehl if (!iflg || context->busy) { /* additional options */
697b8b5be36SMartin Diehl if (!iflg) {
69826cc229bSBarry Smith PetscCall(PetscNew(&context));
69926cc229bSBarry Smith context->busy = PETSC_TRUE;
70026cc229bSBarry Smith PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
70126cc229bSBarry Smith PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
70226cc229bSBarry Smith } else {
70326cc229bSBarry Smith PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
70426cc229bSBarry Smith }
70526cc229bSBarry Smith
70626cc229bSBarry Smith /* Default number of process columns and rows */
70726cc229bSBarry Smith lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
70826cc229bSBarry Smith if (!lu->nprow) lu->nprow = 1;
70926cc229bSBarry Smith while (lu->nprow > 0) {
71026cc229bSBarry Smith lu->npcol = (int_t)(size / lu->nprow);
71126cc229bSBarry Smith if (size == lu->nprow * lu->npcol) break;
71226cc229bSBarry Smith lu->nprow--;
71326cc229bSBarry Smith }
7149e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
71526cc229bSBarry Smith lu->use3d = PETSC_FALSE;
71626cc229bSBarry Smith lu->npdep = 1;
71726cc229bSBarry Smith #endif
71826cc229bSBarry Smith
7199e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
72026cc229bSBarry Smith PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
72126cc229bSBarry Smith if (lu->use3d) {
72226cc229bSBarry Smith PetscInt t;
72326cc229bSBarry Smith PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
72426cc229bSBarry Smith t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
72526cc229bSBarry Smith PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
72626cc229bSBarry Smith if (lu->npdep > 1) {
72726cc229bSBarry Smith lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
72826cc229bSBarry Smith if (!lu->nprow) lu->nprow = 1;
72926cc229bSBarry Smith while (lu->nprow > 0) {
73026cc229bSBarry Smith lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
73126cc229bSBarry Smith if (size == lu->nprow * lu->npcol * lu->npdep) break;
73226cc229bSBarry Smith lu->nprow--;
73326cc229bSBarry Smith }
73426cc229bSBarry Smith }
73526cc229bSBarry Smith }
73626cc229bSBarry Smith #endif
73726cc229bSBarry Smith PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
73826cc229bSBarry Smith PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
7399e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
74026cc229bSBarry Smith PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
74126cc229bSBarry Smith #else
74226cc229bSBarry Smith PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
74326cc229bSBarry Smith #endif
74426cc229bSBarry Smith /* end of adding additional options */
74526cc229bSBarry Smith
7469e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
74726cc229bSBarry Smith if (lu->use3d) {
7489ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, (int)lu->npdep, &lu->grid3d));
7499371c9d4SSatish Balay if (context) {
7509371c9d4SSatish Balay context->grid3d = lu->grid3d;
7519371c9d4SSatish Balay context->use3d = lu->use3d;
7529371c9d4SSatish Balay }
75326cc229bSBarry Smith } else {
75426cc229bSBarry Smith #endif
7559ad2cedaSPierre Jolivet PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, &lu->grid));
75626cc229bSBarry Smith if (context) context->grid = lu->grid;
7579e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
75826cc229bSBarry Smith }
75926cc229bSBarry Smith #endif
76026cc229bSBarry Smith PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
761b8b5be36SMartin Diehl if (iflg) {
76226cc229bSBarry Smith PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
76326cc229bSBarry Smith } else {
76426cc229bSBarry Smith PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
76526cc229bSBarry Smith }
766b8b5be36SMartin Diehl } else { /* (iflg && !context->busy) */
7679d3446b2SPierre Jolivet PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n"));
76826cc229bSBarry Smith context->busy = PETSC_TRUE;
76926cc229bSBarry Smith lu->grid = context->grid;
77026cc229bSBarry Smith }
77126cc229bSBarry Smith PetscOptionsEnd();
77226cc229bSBarry Smith
773b24902e0SBarry Smith /* Initialize ScalePermstruct and LUstruct. */
774a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
775b2d1094fSBarry Smith if (lu->singleprecision) {
776b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct));
777b2d1094fSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct));
778a6ad605dSBarry Smith } else
779a6ad605dSBarry Smith #endif
780a6ad605dSBarry Smith {
781e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
782e77caa6dSBarry Smith PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
783b2d1094fSBarry Smith }
7848c5e8961SHong Zhang F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
7858c5e8961SHong Zhang F->ops->solve = MatSolve_SuperLU_DIST;
7868c5e8961SHong Zhang F->ops->matsolve = MatMatSolve_SuperLU_DIST;
7873d4d7ae9SHong Zhang F->ops->getinertia = NULL;
78809d31890SHong Zhang
789b94d7dedSBarry Smith if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
7908c5e8961SHong Zhang lu->CleanUpSuperLU_Dist = PETSC_TRUE;
7913ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
792b24902e0SBarry Smith }
793b24902e0SBarry Smith
MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo * info)794d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
795d71ae5a4SJacob Faibussowitsch {
796bf99a652SHong Zhang PetscFunctionBegin;
7979566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
798bf99a652SHong Zhang F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
7993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
800bf99a652SHong Zhang }
801bf99a652SHong Zhang
MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType * type)802d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
803d71ae5a4SJacob Faibussowitsch {
80435bd34faSBarry Smith PetscFunctionBegin;
8052692d6eeSBarry Smith *type = MATSOLVERSUPERLU_DIST;
8063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
80735bd34faSBarry Smith }
80835bd34faSBarry Smith
MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer)809d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
810d71ae5a4SJacob Faibussowitsch {
811a305db0bSBarry Smith Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
812a305db0bSBarry Smith superlu_dist_options_t options;
813a305db0bSBarry Smith
814a305db0bSBarry Smith PetscFunctionBegin;
815a305db0bSBarry Smith /* check if matrix is superlu_dist type */
8163ba16761SJacob Faibussowitsch if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
817a305db0bSBarry Smith
818a305db0bSBarry Smith options = lu->options;
8199566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
820c0aa6a63SJacob Faibussowitsch /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
821c0aa6a63SJacob Faibussowitsch * format spec for int64_t is set to %d for whatever reason */
8229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
8239e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
82448a46eb9SPierre Jolivet if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
8253e558968SBarry Smith #endif
8263e558968SBarry Smith
8279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
8289566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
8299566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
8309566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
8310425e358SSatish Balay
8320425e358SSatish Balay switch (options.RowPerm) {
833d71ae5a4SJacob Faibussowitsch case NOROWPERM:
834d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n"));
835d71ae5a4SJacob Faibussowitsch break;
836d71ae5a4SJacob Faibussowitsch case LargeDiag_MC64:
837d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n"));
838d71ae5a4SJacob Faibussowitsch break;
839d71ae5a4SJacob Faibussowitsch case LargeDiag_AWPM:
840d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n"));
841d71ae5a4SJacob Faibussowitsch break;
842d71ae5a4SJacob Faibussowitsch case MY_PERMR:
843d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n"));
844d71ae5a4SJacob Faibussowitsch break;
845d71ae5a4SJacob Faibussowitsch default:
846d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
8470425e358SSatish Balay }
848a305db0bSBarry Smith
849a305db0bSBarry Smith switch (options.ColPerm) {
850d71ae5a4SJacob Faibussowitsch case NATURAL:
851d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n"));
852d71ae5a4SJacob Faibussowitsch break;
853d71ae5a4SJacob Faibussowitsch case MMD_AT_PLUS_A:
854d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n"));
855d71ae5a4SJacob Faibussowitsch break;
856d71ae5a4SJacob Faibussowitsch case MMD_ATA:
857d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n"));
858d71ae5a4SJacob Faibussowitsch break;
859bf3e94a3SBarry Smith /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
860d71ae5a4SJacob Faibussowitsch case METIS_AT_PLUS_A:
861d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n"));
862d71ae5a4SJacob Faibussowitsch break;
863d71ae5a4SJacob Faibussowitsch case PARMETIS:
864d71ae5a4SJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n"));
865d71ae5a4SJacob Faibussowitsch break;
866d71ae5a4SJacob Faibussowitsch default:
867d71ae5a4SJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
868a305db0bSBarry Smith }
869a305db0bSBarry Smith
8709566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
871a305db0bSBarry Smith
872a305db0bSBarry Smith if (lu->FactPattern == SamePattern) {
8739566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n"));
8741639e5c1SHong Zhang } else if (lu->FactPattern == SamePattern_SameRowPerm) {
8759566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n"));
8761639e5c1SHong Zhang } else if (lu->FactPattern == DOFACT) {
8779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n"));
878b2d1094fSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
879a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
880b2d1094fSBarry Smith if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n"));
881a6ad605dSBarry Smith #endif
8823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
883a305db0bSBarry Smith }
884a305db0bSBarry Smith
MatView_SuperLU_DIST(Mat A,PetscViewer viewer)885d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
886d71ae5a4SJacob Faibussowitsch {
8879f196a02SMartin Diehl PetscBool isascii;
888a305db0bSBarry Smith PetscViewerFormat format;
889a305db0bSBarry Smith
890a305db0bSBarry Smith PetscFunctionBegin;
8919f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
8929f196a02SMartin Diehl if (isascii) {
8939566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format));
89448a46eb9SPierre Jolivet if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
895a305db0bSBarry Smith }
8963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
897a305db0bSBarry Smith }
898a305db0bSBarry Smith
MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat * F)899d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
900d71ae5a4SJacob Faibussowitsch {
901b24902e0SBarry Smith Mat B;
902f0c56d0fSKris Buschelman Mat_SuperLU_DIST *lu;
90326cc229bSBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N;
904ea2994c3SHong Zhang PetscMPIInt size;
90572210a05SBarry Smith superlu_dist_options_t options;
906b2d1094fSBarry Smith PetscBool flg;
9076e4037fdSJunchao Zhang PetscPrecision precision = PETSC_PRECISION_INVALID;
908448de63eSKris Buschelman
909f0c56d0fSKris Buschelman PetscFunctionBegin;
910448de63eSKris Buschelman /* Create the factorization matrix */
9119566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
9129566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
9139566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
9149566063dSJacob Faibussowitsch PetscCall(MatSetUp(B));
915ecddaf3cSBarry Smith B->ops->getinfo = MatGetInfo_External;
9162877fffaSHong Zhang B->ops->view = MatView_SuperLU_DIST;
917bacf2223SBarry Smith B->ops->destroy = MatDestroy_SuperLU_DIST;
9182205254eSKarl Rupp
9193e56d16dSNuno Nobre /* set_default_options_dist() sets the default input options to the following values:
92019660a1cSHong Zhang options.Fact = DOFACT;
92119660a1cSHong Zhang options.Equil = YES;
9223348698aSHong Zhang options.ParSymbFact = NO;
9236b508d82SHong Zhang options.ColPerm = METIS_AT_PLUS_A;
9240425e358SSatish Balay options.RowPerm = LargeDiag_MC64;
9253e56d16dSNuno Nobre options.ReplaceTinyPivot = NO;
9263e56d16dSNuno Nobre options.IterRefine = SLU_DOUBLE;
9273348698aSHong Zhang options.Trans = NOTRANS;
92849d41e62SHong Zhang options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
92919660a1cSHong Zhang options.RefineInitialized = NO;
93019660a1cSHong Zhang options.PrintStat = YES;
9313e56d16dSNuno Nobre options.superlu_acc_offload = 1;
932c8f76b2fSStefano Zampini options.SymPattern = NO;
93319660a1cSHong Zhang */
934ba45f930SHong Zhang set_default_options_dist(&options);
935e95ce8a1SHong Zhang
93666e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE;
937c8f76b2fSStefano Zampini if (ftype == MAT_FACTOR_LU) {
938c8f76b2fSStefano Zampini B->factortype = MAT_FACTOR_LU;
939c8f76b2fSStefano Zampini B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
940c8f76b2fSStefano Zampini } else {
941c8f76b2fSStefano Zampini B->factortype = MAT_FACTOR_CHOLESKY;
942c8f76b2fSStefano Zampini B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
943c8f76b2fSStefano Zampini options.SymPattern = YES;
944c8f76b2fSStefano Zampini }
945c8f76b2fSStefano Zampini
946c8f76b2fSStefano Zampini /* set solvertype */
9479566063dSJacob Faibussowitsch PetscCall(PetscFree(B->solvertype));
9489566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
949c8f76b2fSStefano Zampini
9504dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&lu));
951c8f76b2fSStefano Zampini B->data = lu;
9529566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
953ed876474SBarry Smith
954448de63eSKris Buschelman lu->options = options;
95519660a1cSHong Zhang lu->options.Fact = DOFACT;
95649d41e62SHong Zhang lu->matsolve_iscalled = PETSC_FALSE;
95749d41e62SHong Zhang lu->matmatsolve_iscalled = PETSC_FALSE;
9582205254eSKarl Rupp
959cd0670b2SPierre Jolivet PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "SuperLU_DIST Options", "Mat");
9606e4037fdSJunchao Zhang PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by SuperLU_DIST", "MATSOLVERSUPERLU_DIST", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, &flg));
9616e4037fdSJunchao Zhang PetscOptionsEnd();
962b2d1094fSBarry Smith if (flg) {
9636e4037fdSJunchao Zhang PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single or double as option for SuperLU_DIST");
964a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
9656e4037fdSJunchao Zhang lu->singleprecision = (PetscBool)(precision == PETSC_PRECISION_SINGLE); // It also implies PetscReal is not single; not merely SuperLU_DIST is running in single
966b2d1094fSBarry Smith #endif
967b2d1094fSBarry Smith }
968b2d1094fSBarry Smith
9699566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
9709566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
971fb785984SHong Zhang
972448de63eSKris Buschelman *F = B;
9733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
974448de63eSKris Buschelman }
975f3c0ef26SHong Zhang
MatSolverTypeRegister_SuperLU_DIST(void)976d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
977d71ae5a4SJacob Faibussowitsch {
978f3c0ef26SHong Zhang PetscFunctionBegin;
9799566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
9809566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
9819566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
9829566063dSJacob Faibussowitsch PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
9837def7855SStefano Zampini if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
9848434afd1SBarry Smith PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL));
9857def7855SStefano Zampini PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
9867def7855SStefano Zampini }
9873ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
988f3c0ef26SHong Zhang }
989448de63eSKris Buschelman
99024b6179bSKris Buschelman /*MC
9912692d6eeSBarry Smith MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
99224b6179bSKris Buschelman
9932ef1f0ffSBarry Smith Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST
994c2b89b5dSBarry Smith
9952ef1f0ffSBarry Smith Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
996c2b89b5dSBarry Smith
99711a5261eSBarry Smith Works with `MATAIJ` matrices
99824b6179bSKris Buschelman
99924b6179bSKris Buschelman Options Database Keys:
100041c8de11SBarry Smith + -mat_superlu_dist_r <n> - number of rows in processor partition
100124b6179bSKris Buschelman . -mat_superlu_dist_c <n> - number of columns in processor partition
10023e558968SBarry Smith . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
10032ef1f0ffSBarry Smith . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
100424b6179bSKris Buschelman . -mat_superlu_dist_equil - equilibrate the matrix
10050425e358SSatish Balay . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
1006f7374848SSatish Balay . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
100724b6179bSKris Buschelman . -mat_superlu_dist_replacetinypivot - replace tiny pivots
10082ef1f0ffSBarry Smith . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
100924b6179bSKris Buschelman . -mat_superlu_dist_iterrefine - use iterative refinement
1010b2d1094fSBarry Smith . -mat_superlu_dist_printstat - print factorization information
1011a0940d2aSSatish Balay . -mat_superlu_dist_gpuoffload - offload factorization onto the GPUs, requires SuperLU_DIST 8.0.0 or later
1012dfe00d7bSSatish Balay - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision
101324b6179bSKris Buschelman
101424b6179bSKris Buschelman Level: beginner
101524b6179bSKris Buschelman
10162ef1f0ffSBarry Smith Note:
10173e56d16dSNuno Nobre If PETSc was configured with `--with-cuda` then this solver will use the GPUs by default.
10182ef1f0ffSBarry Smith
10191cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
102024b6179bSKris Buschelman M*/
1021