xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 4e8208cbcbc709572b8abe32f33c78b69c819375)
1448de63eSKris Buschelman /*
298dbe763SHong Zhang         Provides an interface to the SuperLU_DIST sparse solver
3448de63eSKris Buschelman */
4448de63eSKris Buschelman 
5c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
6c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
7a7b55187SBarry Smith #include <petscpkg_version.h>
8448de63eSKris Buschelman 
9a51cd166SPierre Jolivet PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef")
10448de63eSKris Buschelman EXTERN_C_BEGIN
112695cf96SNuno Nobre // To suppress compiler warnings
122695cf96SNuno Nobre #define DISABLE_CUSPARSE_DEPRECATED
13448de63eSKris Buschelman #if defined(PETSC_USE_COMPLEX)
143e558968SBarry Smith   #define CASTDOUBLECOMPLEX     (doublecomplex *)
15d4783600SBarry Smith   #define CASTDOUBLECOMPLEXSTAR (doublecomplex **)
16c6db04a5SJed Brown   #include <superlu_zdefs.h>
17a7b55187SBarry Smith   #define LUstructInit                  zLUstructInit
18a7b55187SBarry Smith   #define ScalePermstructInit           zScalePermstructInit
19a7b55187SBarry Smith   #define ScalePermstructFree           zScalePermstructFree
20a7b55187SBarry Smith   #define LUstructFree                  zLUstructFree
21a7b55187SBarry Smith   #define Destroy_LU                    zDestroy_LU
22a7b55187SBarry Smith   #define ScalePermstruct_t             zScalePermstruct_t
23a7b55187SBarry Smith   #define LUstruct_t                    zLUstruct_t
24a7b55187SBarry Smith   #define SOLVEstruct_t                 zSOLVEstruct_t
253e558968SBarry Smith   #define SolveFinalize                 zSolveFinalize
263e558968SBarry Smith   #define pGetDiagU                     pzGetDiagU
27d4783600SBarry Smith   #define pgssvx                        pzgssvx
283e558968SBarry Smith   #define allocateA_dist                zallocateA_dist
29d4783600SBarry Smith   #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist
30d4783600SBarry Smith   #define SLU                           SLU_Z
319e48eb9fSliuyangzhuan   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
323e558968SBarry Smith     #define DeAllocLlu_3d              zDeAllocLlu_3d
333e558968SBarry Smith     #define DeAllocGlu_3d              zDeAllocGlu_3d
343e558968SBarry Smith     #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d
353e558968SBarry Smith     #define pgssvx3d                   pzgssvx3d
36a7b55187SBarry Smith   #endif
37d4783600SBarry Smith #elif defined(PETSC_USE_REAL_SINGLE)
38d4783600SBarry Smith   #define CASTDOUBLECOMPLEX
39d4783600SBarry Smith   #define CASTDOUBLECOMPLEXSTAR
40d4783600SBarry Smith   #include <superlu_sdefs.h>
41d4783600SBarry Smith   #define LUstructInit                  sLUstructInit
42d4783600SBarry Smith   #define ScalePermstructInit           sScalePermstructInit
43d4783600SBarry Smith   #define ScalePermstructFree           sScalePermstructFree
44d4783600SBarry Smith   #define LUstructFree                  sLUstructFree
45d4783600SBarry Smith   #define Destroy_LU                    sDestroy_LU
46d4783600SBarry Smith   #define ScalePermstruct_t             sScalePermstruct_t
47d4783600SBarry Smith   #define LUstruct_t                    sLUstruct_t
48d4783600SBarry Smith   #define SOLVEstruct_t                 sSOLVEstruct_t
49d4783600SBarry Smith   #define SolveFinalize                 sSolveFinalize
50d4783600SBarry Smith   #define pGetDiagU                     psGetDiagU
51d4783600SBarry Smith   #define pgssvx                        psgssvx
52d4783600SBarry Smith   #define allocateA_dist                sallocateA_dist
53d4783600SBarry Smith   #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist
54d4783600SBarry Smith   #define SLU                           SLU_S
559e48eb9fSliuyangzhuan   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
56d4783600SBarry Smith     #define DeAllocLlu_3d              sDeAllocLlu_3d
57d4783600SBarry Smith     #define DeAllocGlu_3d              sDeAllocGlu_3d
58d4783600SBarry Smith     #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d
59d4783600SBarry Smith     #define pgssvx3d                   psgssvx3d
60d4783600SBarry Smith   #endif
61448de63eSKris Buschelman #else
623e558968SBarry Smith   #define CASTDOUBLECOMPLEX
63d4783600SBarry Smith   #define CASTDOUBLECOMPLEXSTAR
64c6db04a5SJed Brown   #include <superlu_ddefs.h>
65a7b55187SBarry Smith   #define LUstructInit                  dLUstructInit
66a7b55187SBarry Smith   #define ScalePermstructInit           dScalePermstructInit
67a7b55187SBarry Smith   #define ScalePermstructFree           dScalePermstructFree
68a7b55187SBarry Smith   #define LUstructFree                  dLUstructFree
69a7b55187SBarry Smith   #define Destroy_LU                    dDestroy_LU
70a7b55187SBarry Smith   #define ScalePermstruct_t             dScalePermstruct_t
71a7b55187SBarry Smith   #define LUstruct_t                    dLUstruct_t
72a7b55187SBarry Smith   #define SOLVEstruct_t                 dSOLVEstruct_t
733e558968SBarry Smith   #define SolveFinalize                 dSolveFinalize
743e558968SBarry Smith   #define pGetDiagU                     pdGetDiagU
75d4783600SBarry Smith   #define pgssvx                        pdgssvx
763e558968SBarry Smith   #define allocateA_dist                dallocateA_dist
77d4783600SBarry Smith   #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist
78d4783600SBarry Smith   #define SLU                           SLU_D
799e48eb9fSliuyangzhuan   #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
803e558968SBarry Smith     #define DeAllocLlu_3d              dDeAllocLlu_3d
813e558968SBarry Smith     #define DeAllocGlu_3d              dDeAllocGlu_3d
823e558968SBarry Smith     #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d
833e558968SBarry Smith     #define pgssvx3d                   pdgssvx3d
84a7b55187SBarry Smith   #endif
85448de63eSKris Buschelman #endif
86a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
87b2d1094fSBarry Smith   #include <superlu_sdefs.h>
88a6ad605dSBarry Smith #endif
89448de63eSKris Buschelman EXTERN_C_END
90a51cd166SPierre Jolivet PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
91448de63eSKris Buschelman 
92448de63eSKris Buschelman typedef struct {
93448de63eSKris Buschelman   int_t      nprow, npcol, *row, *col;
94448de63eSKris Buschelman   gridinfo_t grid;
959e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
963e558968SBarry Smith   PetscBool    use3d;
973e558968SBarry Smith   int_t        npdep; /* replication factor, must be power of two */
983e558968SBarry Smith   gridinfo3d_t grid3d;
993e558968SBarry Smith #endif
10072210a05SBarry Smith   superlu_dist_options_t options;
101448de63eSKris Buschelman   SuperMatrix            A_sup;
102448de63eSKris Buschelman   ScalePermstruct_t      ScalePermstruct;
103448de63eSKris Buschelman   LUstruct_t             LUstruct;
104448de63eSKris Buschelman   int                    StatPrint;
105448de63eSKris Buschelman   SOLVEstruct_t          SOLVEstruct;
10619660a1cSHong Zhang   fact_t                 FactPattern;
107448de63eSKris Buschelman   MPI_Comm               comm_superlu;
108d4783600SBarry Smith   PetscScalar           *val;
109ace3abfcSBarry Smith   PetscBool              matsolve_iscalled, matmatsolve_iscalled;
110ace3abfcSBarry Smith   PetscBool              CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
111a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
112a6ad605dSBarry Smith   sScalePermstruct_t sScalePermstruct;
113a6ad605dSBarry Smith   sLUstruct_t        sLUstruct;
114a6ad605dSBarry Smith   sSOLVEstruct_t     sSOLVEstruct;
115a6ad605dSBarry Smith   float             *sval;
116b2d1094fSBarry Smith   PetscBool          singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */
117a6ad605dSBarry Smith   float             *sbptr;
118a6ad605dSBarry Smith #endif
119f0c56d0fSKris Buschelman } Mat_SuperLU_DIST;
120f0c56d0fSKris Buschelman 
MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar * diagU)12166976f2fSJacob Faibussowitsch static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU)
122d71ae5a4SJacob Faibussowitsch {
12398dbe763SHong Zhang   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data;
124fb785984SHong Zhang 
125fb785984SHong Zhang   PetscFunctionBegin;
126e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU));
1273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
128fb785984SHong Zhang }
129fb785984SHong Zhang 
MatSuperluDistGetDiagU(Mat F,PetscScalar * diagU)130d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU)
131d71ae5a4SJacob Faibussowitsch {
132fb785984SHong Zhang   PetscFunctionBegin;
133fb785984SHong Zhang   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
134cac4c232SBarry Smith   PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU));
1353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13698dbe763SHong Zhang }
13798dbe763SHong Zhang 
138ed876474SBarry Smith /*  This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */
139ed876474SBarry Smith typedef struct {
140ed876474SBarry Smith   MPI_Comm   comm;
141ed876474SBarry Smith   PetscBool  busy;
142ed876474SBarry Smith   gridinfo_t grid;
1439e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
1443e558968SBarry Smith   PetscBool    use3d;
1453e558968SBarry Smith   gridinfo3d_t grid3d;
1463e558968SBarry Smith #endif
147ed876474SBarry Smith } PetscSuperLU_DIST;
1487def7855SStefano Zampini 
149ed876474SBarry Smith static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID;
150ed876474SBarry Smith 
Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm,PetscMPIInt keyval,void * attr_val,void * extra_state)1518434afd1SBarry Smith PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
152d71ae5a4SJacob Faibussowitsch {
153ed876474SBarry Smith   PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val;
154ed876474SBarry Smith 
155ed876474SBarry Smith   PetscFunctionBegin;
1567c5b2466SBarry Smith   PetscCheckReturnMPI(keyval == Petsc_Superlu_dist_keyval, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval");
1579e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
1583e558968SBarry Smith   if (context->use3d) {
159e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d));
1603e558968SBarry Smith   } else
1613e558968SBarry Smith #endif
162e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid));
1637c5b2466SBarry Smith   PetscCallMPIReturnMPI(MPI_Comm_free(&context->comm));
1647c5b2466SBarry Smith   PetscCallReturnMPI(PetscFree(context));
165ed876474SBarry Smith   PetscFunctionReturn(MPI_SUCCESS);
166ed876474SBarry Smith }
167ed876474SBarry Smith 
16895a31119SBarry Smith /*
16995a31119SBarry Smith    Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for
17095a31119SBarry Smith    users who do not destroy all PETSc objects before PetscFinalize().
17195a31119SBarry Smith 
1728434afd1SBarry Smith    The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn()
17395a31119SBarry Smith    can still check that the keyval associated with the MPI communicator is correct when the MPI
17495a31119SBarry Smith    communicator is destroyed.
17595a31119SBarry Smith 
17695a31119SBarry Smith    This is called in PetscFinalize()
17795a31119SBarry Smith */
Petsc_Superlu_dist_keyval_free(void)178d71ae5a4SJacob Faibussowitsch static PetscErrorCode Petsc_Superlu_dist_keyval_free(void)
179d71ae5a4SJacob Faibussowitsch {
18095a31119SBarry Smith   PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval;
181ed876474SBarry Smith 
182ed876474SBarry Smith   PetscFunctionBegin;
1839566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp));
1843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
185ed876474SBarry Smith }
186ed876474SBarry Smith 
MatDestroy_SuperLU_DIST(Mat A)187d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
188d71ae5a4SJacob Faibussowitsch {
189a305db0bSBarry Smith   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
190448de63eSKris Buschelman 
191448de63eSKris Buschelman   PetscFunctionBegin;
192a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
193b2d1094fSBarry Smith   PetscCall(PetscFree(lu->sbptr));
194a6ad605dSBarry Smith #endif
195a305db0bSBarry Smith   if (lu->CleanUpSuperLU_Dist) {
196448de63eSKris Buschelman     /* Deallocate SuperLU_DIST storage */
197e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
198b2d1094fSBarry Smith     if (lu->options.SolveInitialized) {
199a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
200b2d1094fSBarry Smith       if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
201a6ad605dSBarry Smith       else
202a6ad605dSBarry Smith #endif
203a6ad605dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
204b2d1094fSBarry Smith     }
2059e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
2063e558968SBarry Smith     if (lu->use3d) {
207a6ad605dSBarry Smith   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
208b2d1094fSBarry Smith       if (lu->singleprecision) {
2099e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
2109e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d));
2119e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
2129e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
213a6ad605dSBarry Smith       } else
214a6ad605dSBarry Smith   #endif
215a6ad605dSBarry Smith       {
2169e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
217a6ad605dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d));
2189e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
2199e48eb9fSliuyangzhuan         PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
2209e48eb9fSliuyangzhuan       }
2213e558968SBarry Smith     } else
2223e558968SBarry Smith #endif
223a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
224b2d1094fSBarry Smith       if (lu->singleprecision) {
225b2d1094fSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct));
226b2d1094fSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct));
227b2d1094fSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct));
228a6ad605dSBarry Smith     } else
229a6ad605dSBarry Smith #endif
230a6ad605dSBarry Smith     {
231e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
232e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct));
233e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct));
234b2d1094fSBarry Smith     }
2353e558968SBarry Smith     /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */
236ed876474SBarry Smith     if (lu->comm_superlu) {
2379e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
2383e558968SBarry Smith       if (lu->use3d) {
239e77caa6dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d));
2403e558968SBarry Smith       } else
2413e558968SBarry Smith #endif
242e77caa6dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid));
243c866f80eSFande Kong     }
244c866f80eSFande Kong   }
245c866f80eSFande Kong   /*
246c866f80eSFande Kong    * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist.
247c866f80eSFande Kong    * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use
248c866f80eSFande Kong    * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist
249c866f80eSFande Kong    * is off, and the communicator was not released or marked as "not busy " in the old code.
250c866f80eSFande Kong    * Here we try to release comm regardless.
251c866f80eSFande Kong   */
252c866f80eSFande Kong   if (lu->comm_superlu) {
2539566063dSJacob Faibussowitsch     PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
254ed876474SBarry Smith   } else {
255ed876474SBarry Smith     PetscSuperLU_DIST *context;
256ed876474SBarry Smith     MPI_Comm           comm;
257b8b5be36SMartin Diehl     PetscMPIInt        iflg;
258ed876474SBarry Smith 
2599566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
260b8b5be36SMartin Diehl     PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg));
261b8b5be36SMartin Diehl     if (iflg) context->busy = PETSC_FALSE;
262ed876474SBarry Smith   }
263c866f80eSFande Kong 
2649566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
265fb785984SHong Zhang   /* clear composed functions */
2669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
2679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL));
2683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
269448de63eSKris Buschelman }
270448de63eSKris Buschelman 
MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)271d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x)
272d71ae5a4SJacob Faibussowitsch {
273a305db0bSBarry Smith   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
2741e8514b5SHong Zhang   PetscInt          m  = A->rmap->n;
275448de63eSKris Buschelman   SuperLUStat_t     stat;
276d4783600SBarry Smith   PetscReal         berr[1];
2777cac11d5SHong Zhang   PetscScalar      *bptr = NULL;
278a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
279b2d1094fSBarry Smith   float sberr[1];
280a6ad605dSBarry Smith #endif
28115082402SBarry Smith   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
282dff31646SBarry Smith   static PetscBool cite = PETSC_FALSE;
283448de63eSKris Buschelman 
284448de63eSKris Buschelman   PetscFunctionBegin;
28595ca0fccSBarry Smith   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
2869371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{lidemmel03,\n  author = {Xiaoye S. Li and James W. Demmel},\n  title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n           Solver for Unsymmetric Linear Systems},\n  journal = {ACM "
2879371c9d4SSatish Balay                                    "Trans. Mathematical Software},\n  volume = {29},\n  number = {2},\n  pages = {110-140},\n  year = 2003\n}\n",
2889371c9d4SSatish Balay                                    &cite));
289b7d2c110SHong Zhang 
29049d41e62SHong Zhang   if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
29149d41e62SHong Zhang     /* see comments in MatMatSolve() */
292a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
293b2d1094fSBarry Smith     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
294a6ad605dSBarry Smith     else
295a6ad605dSBarry Smith #endif
296a6ad605dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
29749d41e62SHong Zhang     lu->options.SolveInitialized = NO;
298448de63eSKris Buschelman   }
2999566063dSJacob Faibussowitsch   PetscCall(VecCopy(b_mpi, x));
3009566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &bptr));
301a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
302b2d1094fSBarry Smith   if (lu->singleprecision) {
303b2d1094fSBarry Smith     PetscInt n;
304b2d1094fSBarry Smith     PetscCall(VecGetLocalSize(x, &n));
305b2d1094fSBarry Smith     if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr));
306b2d1094fSBarry Smith     for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
307b2d1094fSBarry Smith   }
308a6ad605dSBarry Smith #endif
309448de63eSKris Buschelman 
310e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
3119e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
312b2d1094fSBarry Smith   if (lu->use3d) {
313a6ad605dSBarry Smith   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
3149ad2cedaSPierre Jolivet     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
315a6ad605dSBarry Smith     else
316a6ad605dSBarry Smith   #endif
3179ad2cedaSPierre Jolivet       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
3189e48eb9fSliuyangzhuan     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx3d fails, info: %d", info);
319b2d1094fSBarry Smith   } else
320448de63eSKris Buschelman #endif
3219e48eb9fSliuyangzhuan   {
322a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
3239ad2cedaSPierre Jolivet     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, (int)m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info));
324a6ad605dSBarry Smith     else
325a6ad605dSBarry Smith #endif
3269ad2cedaSPierre Jolivet       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
3279e48eb9fSliuyangzhuan     PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx fails, info: %d", info);
3289e48eb9fSliuyangzhuan   }
329e77caa6dSBarry Smith   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
330e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
331a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
332b2d1094fSBarry Smith   if (lu->singleprecision) {
333b2d1094fSBarry Smith     PetscInt n;
334b2d1094fSBarry Smith     PetscCall(VecGetLocalSize(x, &n));
335b2d1094fSBarry Smith     for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i];
336b2d1094fSBarry Smith   }
337a6ad605dSBarry Smith #endif
3389566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &bptr));
33949d41e62SHong Zhang   lu->matsolve_iscalled    = PETSC_TRUE;
34049d41e62SHong Zhang   lu->matmatsolve_iscalled = PETSC_FALSE;
3413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
342448de63eSKris Buschelman }
343448de63eSKris Buschelman 
MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)344d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X)
345d71ae5a4SJacob Faibussowitsch {
346a305db0bSBarry Smith   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data;
3471e8514b5SHong Zhang   PetscInt          m  = A->rmap->n, nrhs;
34849d41e62SHong Zhang   SuperLUStat_t     stat;
349d4783600SBarry Smith   PetscReal         berr[1];
35049d41e62SHong Zhang   PetscScalar      *bptr;
35149d41e62SHong Zhang   int               info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
352bda8bf91SBarry Smith   PetscBool         flg;
35349d41e62SHong Zhang 
35449d41e62SHong Zhang   PetscFunctionBegin;
355a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
356b2d1094fSBarry Smith   PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single");
357a6ad605dSBarry Smith #endif
35895ca0fccSBarry Smith   PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED");
3599566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
36095ca0fccSBarry Smith   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
361f9fb9879SHong Zhang   if (X != B_mpi) {
3629566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
36328b400f6SJacob Faibussowitsch     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
364f9fb9879SHong Zhang   }
36549d41e62SHong Zhang 
36649d41e62SHong Zhang   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
36749d41e62SHong Zhang     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
36849d41e62SHong Zhang        thus destroy it and create a new SOLVEstruct.
36949d41e62SHong Zhang        Otherwise it may result in memory corruption or incorrect solution
370c4762a1bSJed Brown        See src/mat/tests/ex125.c */
371e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
37249d41e62SHong Zhang     lu->options.SolveInitialized = NO;
37349d41e62SHong Zhang   }
37448a46eb9SPierre Jolivet   if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN));
37549d41e62SHong Zhang 
3769566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B_mpi, NULL, &nrhs));
37749d41e62SHong Zhang 
378e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
3799566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &bptr));
3801e8514b5SHong Zhang 
3819e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
3829ad2cedaSPierre Jolivet   if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
3833e558968SBarry Smith   else
38449d41e62SHong Zhang #endif
3859ad2cedaSPierre Jolivet     PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, (int)m, (int)nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info));
38695ca0fccSBarry Smith   PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info);
3879566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &bptr));
38849d41e62SHong Zhang 
389e77caa6dSBarry Smith   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
390e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
39149d41e62SHong Zhang   lu->matsolve_iscalled    = PETSC_FALSE;
39249d41e62SHong Zhang   lu->matmatsolve_iscalled = PETSC_TRUE;
3933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39449d41e62SHong Zhang }
39549d41e62SHong Zhang 
3963d4d7ae9SHong Zhang /*
3973d4d7ae9SHong Zhang   input:
3983d4d7ae9SHong Zhang    F:        numeric Cholesky factor
3993d4d7ae9SHong Zhang   output:
4003d4d7ae9SHong Zhang    nneg:     total number of negative pivots
4013d4d7ae9SHong Zhang    nzero:    total number of zero pivots
4023d4d7ae9SHong Zhang    npos:     (global dimension of F) - nneg - nzero
4033d4d7ae9SHong Zhang */
MatGetInertia_SuperLU_DIST(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)404d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
405d71ae5a4SJacob Faibussowitsch {
4063d4d7ae9SHong Zhang   Mat_SuperLU_DIST *lu    = (Mat_SuperLU_DIST *)F->data;
4073d4d7ae9SHong Zhang   PetscScalar      *diagU = NULL;
4083d4d7ae9SHong Zhang   PetscInt          M, i, neg = 0, zero = 0, pos = 0;
40909d31890SHong Zhang   PetscReal         r;
4103d4d7ae9SHong Zhang 
4113d4d7ae9SHong Zhang   PetscFunctionBegin;
412a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
413b2d1094fSBarry Smith   PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single");
414a6ad605dSBarry Smith #endif
41595ca0fccSBarry Smith   PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled");
41695ca0fccSBarry Smith   PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM");
4179566063dSJacob Faibussowitsch   PetscCall(MatGetSize(F, &M, NULL));
4189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(M, &diagU));
4199566063dSJacob Faibussowitsch   PetscCall(MatSuperluDistGetDiagU(F, diagU));
4203d4d7ae9SHong Zhang   for (i = 0; i < M; i++) {
42109d31890SHong Zhang #if defined(PETSC_USE_COMPLEX)
422b1fc5afdSJose E. Roman     r = PetscAbsReal(PetscImaginaryPart(diagU[i]));
423b1fc5afdSJose E. Roman     PetscCheck(r < 1000 * PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)PetscImaginaryPart(diagU[i]));
42409d31890SHong Zhang     r = PetscRealPart(diagU[i]);
42509d31890SHong Zhang #else
42609d31890SHong Zhang     r = diagU[i];
42709d31890SHong Zhang #endif
42809d31890SHong Zhang     if (r > 0) {
4293d4d7ae9SHong Zhang       pos++;
43009d31890SHong Zhang     } else if (r < 0) {
4313d4d7ae9SHong Zhang       neg++;
4323d4d7ae9SHong Zhang     } else zero++;
4333d4d7ae9SHong Zhang   }
43409d31890SHong Zhang 
4359566063dSJacob Faibussowitsch   PetscCall(PetscFree(diagU));
4362e38f1a5SHong Zhang   if (nneg) *nneg = neg;
4372e38f1a5SHong Zhang   if (nzero) *nzero = zero;
4382e38f1a5SHong Zhang   if (npos) *npos = pos;
4393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4403d4d7ae9SHong Zhang }
44149d41e62SHong Zhang 
MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo * info)442d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info)
443d71ae5a4SJacob Faibussowitsch {
4443d4d7ae9SHong Zhang   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
445c8f76b2fSStefano Zampini   Mat                Aloc;
446c8f76b2fSStefano Zampini   const PetscScalar *av;
447c8f76b2fSStefano Zampini   const PetscInt    *ai = NULL, *aj = NULL;
448*2a8381b2SBarry Smith   PetscInt           nz, unused;
44915082402SBarry Smith   int                sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
450448de63eSKris Buschelman   SuperLUStat_t      stat;
451d4783600SBarry Smith   PetscReal         *berr = 0;
452a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
453b2d1094fSBarry Smith   float *sberr = 0;
454a6ad605dSBarry Smith #endif
455c8f76b2fSStefano Zampini   PetscBool ismpiaij, isseqaij, flg;
456448de63eSKris Buschelman 
457448de63eSKris Buschelman   PetscFunctionBegin;
4589566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
4599566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij));
460c8f76b2fSStefano Zampini   if (ismpiaij) {
4619566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc));
462c8f76b2fSStefano Zampini   } else if (isseqaij) {
4639566063dSJacob Faibussowitsch     PetscCall(PetscObjectReference((PetscObject)A));
464c8f76b2fSStefano Zampini     Aloc = A;
46598921bdaSJacob Faibussowitsch   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
466401c3357SHong Zhang 
467*2a8381b2SBarry Smith   PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &unused, &ai, &aj, &flg));
46895ca0fccSBarry Smith   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed");
4699566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(Aloc, &av));
470c8f76b2fSStefano Zampini   nz = ai[Aloc->rmap->n];
471448de63eSKris Buschelman 
4721e8514b5SHong Zhang   /* Allocations for A_sup */
47319660a1cSHong Zhang   if (lu->options.Fact == DOFACT) { /* first numeric factorization */
474a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
475b2d1094fSBarry Smith     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
476a6ad605dSBarry Smith     else
477a6ad605dSBarry Smith #endif
478a6ad605dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
479448de63eSKris Buschelman   } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
48019660a1cSHong Zhang     if (lu->FactPattern == SamePattern_SameRowPerm) {
48119660a1cSHong Zhang       lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
48228865de0SHong Zhang     } else if (lu->FactPattern == SamePattern) {
4839e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
4843e558968SBarry Smith       if (lu->use3d) {
485a6ad605dSBarry Smith   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
486b2d1094fSBarry Smith         if (lu->singleprecision) {
487b2d1094fSBarry Smith           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct));
488b2d1094fSBarry Smith           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct));
489a6ad605dSBarry Smith         } else
490a6ad605dSBarry Smith   #endif
491a6ad605dSBarry Smith         {
492e77caa6dSBarry Smith           PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct));
493e77caa6dSBarry Smith           PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct));
494b2d1094fSBarry Smith         }
4953e558968SBarry Smith       } else
4963e558968SBarry Smith #endif
497a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
498b2d1094fSBarry Smith         if (lu->singleprecision)
499b2d1094fSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
500a6ad605dSBarry Smith       else
501a6ad605dSBarry Smith #endif
502a6ad605dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
503448de63eSKris Buschelman       lu->options.Fact = SamePattern;
504f1869c40SHong Zhang     } else if (lu->FactPattern == DOFACT) {
505e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
506a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
507a6ad605dSBarry Smith       if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct));
508a6ad605dSBarry Smith       else
509a6ad605dSBarry Smith #endif
510e77caa6dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct));
511f1869c40SHong Zhang       lu->options.Fact = DOFACT;
512a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
513b2d1094fSBarry Smith       if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row));
514a6ad605dSBarry Smith       else
515a6ad605dSBarry Smith #endif
516a6ad605dSBarry Smith         PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row));
517ed876474SBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT");
51819660a1cSHong Zhang   }
5191e8514b5SHong Zhang 
5201e8514b5SHong Zhang   /* Copy AIJ matrix to superlu_dist matrix */
5219566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1));
5229566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(lu->col, aj, nz));
523a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
524b2d1094fSBarry Smith   if (lu->singleprecision)
525b2d1094fSBarry Smith     for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */
526a6ad605dSBarry Smith   else
527a6ad605dSBarry Smith #endif
528a6ad605dSBarry Smith     PetscCall(PetscArraycpy(lu->val, av, nz));
529*2a8381b2SBarry Smith   PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &unused, &ai, &aj, &flg));
53095ca0fccSBarry Smith   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed");
5319566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av));
5329566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Aloc));
53328865de0SHong Zhang 
5341e8514b5SHong Zhang   /* Create and setup A_sup */
53528865de0SHong Zhang   if (lu->options.Fact == DOFACT) {
536a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
537b2d1094fSBarry Smith     if (lu->singleprecision)
538b2d1094fSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE));
539b2d1094fSBarry Smith     else
540a6ad605dSBarry Smith #endif
541e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE));
542448de63eSKris Buschelman   }
543448de63eSKris Buschelman 
544448de63eSKris Buschelman   /* Factor the matrix. */
545e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */
5469e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
5473e558968SBarry Smith   if (lu->use3d) {
548a6ad605dSBarry Smith   #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
5499ad2cedaSPierre Jolivet     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
550a6ad605dSBarry Smith     else
551a6ad605dSBarry Smith   #endif
5529ad2cedaSPierre Jolivet       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
5533e558968SBarry Smith   } else
554448de63eSKris Buschelman #endif
5559e48eb9fSliuyangzhuan   {
556a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
5579ad2cedaSPierre Jolivet     if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo));
558a6ad605dSBarry Smith     else
559a6ad605dSBarry Smith #endif
5609ad2cedaSPierre Jolivet       PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, (int)A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
5619e48eb9fSliuyangzhuan   }
5621b4120e5SHong Zhang   if (sinfo > 0) {
56395ca0fccSBarry Smith     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo);
5641b4120e5SHong Zhang     if (sinfo <= lu->A_sup.ncol) {
565603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
5669566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo));
5671b4120e5SHong Zhang     } else if (sinfo > lu->A_sup.ncol) {
5681b4120e5SHong Zhang       /*
5691b4120e5SHong Zhang        number of bytes allocated when memory allocation
5701b4120e5SHong Zhang        failure occurred, plus A->ncol.
5711b4120e5SHong Zhang        */
572603e8f96SBarry Smith       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
5739566063dSJacob Faibussowitsch       PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo));
5741b4120e5SHong Zhang     }
57595ca0fccSBarry Smith   } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo);
5761b4120e5SHong Zhang 
577ac530a7eSPierre Jolivet   if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */
578e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat));
5793d4d7ae9SHong Zhang   F->assembled     = PETSC_TRUE;
5803d4d7ae9SHong Zhang   F->preallocated  = PETSC_TRUE;
58119660a1cSHong Zhang   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
5823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
583448de63eSKris Buschelman }
584448de63eSKris Buschelman 
585f0b74427SPierre Jolivet /* Note the PETSc r and c permutations are ignored */
MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)586d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
587d71ae5a4SJacob Faibussowitsch {
588a305db0bSBarry Smith   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST *)F->data;
58926cc229bSBarry Smith   PetscInt           M = A->rmap->N, N = A->cmap->N, indx;
590b8b5be36SMartin Diehl   PetscMPIInt        size, iflg;
59126cc229bSBarry Smith   PetscBool          flg, set;
59226cc229bSBarry Smith   const char        *colperm[]     = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"};
59326cc229bSBarry Smith   const char        *rowperm[]     = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"};
59426cc229bSBarry Smith   const char        *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"};
59526cc229bSBarry Smith   MPI_Comm           comm;
59626cc229bSBarry Smith   PetscSuperLU_DIST *context = NULL;
597b24902e0SBarry Smith 
598b24902e0SBarry Smith   PetscFunctionBegin;
59926cc229bSBarry Smith   /* Set options to F */
60026cc229bSBarry Smith   PetscCall(PetscObjectGetComm((PetscObject)F, &comm));
60126cc229bSBarry Smith   PetscCallMPI(MPI_Comm_size(comm, &size));
60226cc229bSBarry Smith 
60326cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat");
60426cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
60526cc229bSBarry Smith   if (set && !flg) lu->options.Equil = NO;
60626cc229bSBarry Smith 
60726cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg));
60826cc229bSBarry Smith   if (flg) {
60926cc229bSBarry Smith     switch (indx) {
610d71ae5a4SJacob Faibussowitsch     case 0:
611d71ae5a4SJacob Faibussowitsch       lu->options.RowPerm = NOROWPERM;
612d71ae5a4SJacob Faibussowitsch       break;
613d71ae5a4SJacob Faibussowitsch     case 1:
614d71ae5a4SJacob Faibussowitsch       lu->options.RowPerm = LargeDiag_MC64;
615d71ae5a4SJacob Faibussowitsch       break;
616d71ae5a4SJacob Faibussowitsch     case 2:
617d71ae5a4SJacob Faibussowitsch       lu->options.RowPerm = LargeDiag_AWPM;
618d71ae5a4SJacob Faibussowitsch       break;
619d71ae5a4SJacob Faibussowitsch     case 3:
620d71ae5a4SJacob Faibussowitsch       lu->options.RowPerm = MY_PERMR;
621d71ae5a4SJacob Faibussowitsch       break;
622d71ae5a4SJacob Faibussowitsch     default:
623d71ae5a4SJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation");
62426cc229bSBarry Smith     }
62526cc229bSBarry Smith   }
62626cc229bSBarry Smith 
62726cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg));
62826cc229bSBarry Smith   if (flg) {
62926cc229bSBarry Smith     switch (indx) {
630d71ae5a4SJacob Faibussowitsch     case 0:
631d71ae5a4SJacob Faibussowitsch       lu->options.ColPerm = NATURAL;
632d71ae5a4SJacob Faibussowitsch       break;
633d71ae5a4SJacob Faibussowitsch     case 1:
634d71ae5a4SJacob Faibussowitsch       lu->options.ColPerm = MMD_AT_PLUS_A;
635d71ae5a4SJacob Faibussowitsch       break;
636d71ae5a4SJacob Faibussowitsch     case 2:
637d71ae5a4SJacob Faibussowitsch       lu->options.ColPerm = MMD_ATA;
638d71ae5a4SJacob Faibussowitsch       break;
639d71ae5a4SJacob Faibussowitsch     case 3:
640d71ae5a4SJacob Faibussowitsch       lu->options.ColPerm = METIS_AT_PLUS_A;
641d71ae5a4SJacob Faibussowitsch       break;
64226cc229bSBarry Smith     case 4:
64326cc229bSBarry Smith       lu->options.ColPerm = PARMETIS; /* only works for np>1 */
64426cc229bSBarry Smith       break;
645d71ae5a4SJacob Faibussowitsch     default:
646d71ae5a4SJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
64726cc229bSBarry Smith     }
64826cc229bSBarry Smith   }
64926cc229bSBarry Smith 
65026cc229bSBarry Smith   lu->options.ReplaceTinyPivot = NO;
65126cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set));
65226cc229bSBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
65326cc229bSBarry Smith 
65426cc229bSBarry Smith   lu->options.ParSymbFact = NO;
65526cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set));
65626cc229bSBarry Smith   if (set && flg && size > 1) {
65726cc229bSBarry Smith #if defined(PETSC_HAVE_PARMETIS)
65826cc229bSBarry Smith     lu->options.ParSymbFact = YES;
65926cc229bSBarry Smith     lu->options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
66026cc229bSBarry Smith #else
66126cc229bSBarry Smith     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS");
66226cc229bSBarry Smith #endif
66326cc229bSBarry Smith   }
66426cc229bSBarry Smith 
66526cc229bSBarry Smith   lu->FactPattern = SamePattern;
66626cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg));
66726cc229bSBarry Smith   if (flg) {
66826cc229bSBarry Smith     switch (indx) {
669d71ae5a4SJacob Faibussowitsch     case 0:
670d71ae5a4SJacob Faibussowitsch       lu->FactPattern = SamePattern;
671d71ae5a4SJacob Faibussowitsch       break;
672d71ae5a4SJacob Faibussowitsch     case 1:
673d71ae5a4SJacob Faibussowitsch       lu->FactPattern = SamePattern_SameRowPerm;
674d71ae5a4SJacob Faibussowitsch       break;
675d71ae5a4SJacob Faibussowitsch     case 2:
676d71ae5a4SJacob Faibussowitsch       lu->FactPattern = DOFACT;
677d71ae5a4SJacob Faibussowitsch       break;
67826cc229bSBarry Smith     }
67926cc229bSBarry Smith   }
68026cc229bSBarry Smith 
68126cc229bSBarry Smith   lu->options.IterRefine = NOREFINE;
68226cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set));
6833e56d16dSNuno Nobre   if (set && flg) lu->options.IterRefine = SLU_DOUBLE;
68426cc229bSBarry Smith 
68526cc229bSBarry Smith   if (PetscLogPrintInfo) lu->options.PrintStat = YES;
68626cc229bSBarry Smith   else lu->options.PrintStat = NO;
687e9b55caeSJunchao Zhang   PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL));
688e9b55caeSJunchao Zhang   PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL));
68926cc229bSBarry Smith 
690a0940d2aSSatish Balay #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(8, 0, 0)
6913e56d16dSNuno Nobre   lu->options.superlu_acc_offload = 1;
6923e56d16dSNuno Nobre   PetscCall(PetscOptionsBool("-mat_superlu_dist_gpuoffload", "Offload factorization onto the GPUs", "None", (PetscBool)lu->options.superlu_acc_offload, (PetscBool *)&lu->options.superlu_acc_offload, NULL));
693a0940d2aSSatish Balay #endif
6943e56d16dSNuno Nobre 
695b8b5be36SMartin Diehl   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &iflg));
696b8b5be36SMartin Diehl   if (!iflg || context->busy) { /* additional options */
697b8b5be36SMartin Diehl     if (!iflg) {
69826cc229bSBarry Smith       PetscCall(PetscNew(&context));
69926cc229bSBarry Smith       context->busy = PETSC_TRUE;
70026cc229bSBarry Smith       PetscCallMPI(MPI_Comm_dup(comm, &context->comm));
70126cc229bSBarry Smith       PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context));
70226cc229bSBarry Smith     } else {
70326cc229bSBarry Smith       PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu));
70426cc229bSBarry Smith     }
70526cc229bSBarry Smith 
70626cc229bSBarry Smith     /* Default number of process columns and rows */
70726cc229bSBarry Smith     lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size));
70826cc229bSBarry Smith     if (!lu->nprow) lu->nprow = 1;
70926cc229bSBarry Smith     while (lu->nprow > 0) {
71026cc229bSBarry Smith       lu->npcol = (int_t)(size / lu->nprow);
71126cc229bSBarry Smith       if (size == lu->nprow * lu->npcol) break;
71226cc229bSBarry Smith       lu->nprow--;
71326cc229bSBarry Smith     }
7149e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
71526cc229bSBarry Smith     lu->use3d = PETSC_FALSE;
71626cc229bSBarry Smith     lu->npdep = 1;
71726cc229bSBarry Smith #endif
71826cc229bSBarry Smith 
7199e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
72026cc229bSBarry Smith     PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL));
72126cc229bSBarry Smith     if (lu->use3d) {
72226cc229bSBarry Smith       PetscInt t;
72326cc229bSBarry Smith       PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL));
72426cc229bSBarry Smith       t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep);
72526cc229bSBarry Smith       PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep);
72626cc229bSBarry Smith       if (lu->npdep > 1) {
72726cc229bSBarry Smith         lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep)));
72826cc229bSBarry Smith         if (!lu->nprow) lu->nprow = 1;
72926cc229bSBarry Smith         while (lu->nprow > 0) {
73026cc229bSBarry Smith           lu->npcol = (int_t)(size / (lu->npdep * lu->nprow));
73126cc229bSBarry Smith           if (size == lu->nprow * lu->npcol * lu->npdep) break;
73226cc229bSBarry Smith           lu->nprow--;
73326cc229bSBarry Smith         }
73426cc229bSBarry Smith       }
73526cc229bSBarry Smith     }
73626cc229bSBarry Smith #endif
73726cc229bSBarry Smith     PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL));
73826cc229bSBarry Smith     PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL));
7399e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
74026cc229bSBarry Smith     PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep);
74126cc229bSBarry Smith #else
74226cc229bSBarry Smith     PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol);
74326cc229bSBarry Smith #endif
74426cc229bSBarry Smith     /* end of adding additional options */
74526cc229bSBarry Smith 
7469e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
74726cc229bSBarry Smith     if (lu->use3d) {
7489ad2cedaSPierre Jolivet       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, (int)lu->npdep, &lu->grid3d));
7499371c9d4SSatish Balay       if (context) {
7509371c9d4SSatish Balay         context->grid3d = lu->grid3d;
7519371c9d4SSatish Balay         context->use3d  = lu->use3d;
7529371c9d4SSatish Balay       }
75326cc229bSBarry Smith     } else {
75426cc229bSBarry Smith #endif
7559ad2cedaSPierre Jolivet       PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, (int)lu->nprow, (int)lu->npcol, &lu->grid));
75626cc229bSBarry Smith       if (context) context->grid = lu->grid;
7579e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
75826cc229bSBarry Smith     }
75926cc229bSBarry Smith #endif
76026cc229bSBarry Smith     PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n"));
761b8b5be36SMartin Diehl     if (iflg) {
76226cc229bSBarry Smith       PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n"));
76326cc229bSBarry Smith     } else {
76426cc229bSBarry Smith       PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n"));
76526cc229bSBarry Smith     }
766b8b5be36SMartin Diehl   } else { /* (iflg && !context->busy) */
7679d3446b2SPierre Jolivet     PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n"));
76826cc229bSBarry Smith     context->busy = PETSC_TRUE;
76926cc229bSBarry Smith     lu->grid      = context->grid;
77026cc229bSBarry Smith   }
77126cc229bSBarry Smith   PetscOptionsEnd();
77226cc229bSBarry Smith 
773b24902e0SBarry Smith   /* Initialize ScalePermstruct and LUstruct. */
774a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
775b2d1094fSBarry Smith   if (lu->singleprecision) {
776b2d1094fSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct));
777b2d1094fSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct));
778a6ad605dSBarry Smith   } else
779a6ad605dSBarry Smith #endif
780a6ad605dSBarry Smith   {
781e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct));
782e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct));
783b2d1094fSBarry Smith   }
7848c5e8961SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
7858c5e8961SHong Zhang   F->ops->solve           = MatSolve_SuperLU_DIST;
7868c5e8961SHong Zhang   F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
7873d4d7ae9SHong Zhang   F->ops->getinertia      = NULL;
78809d31890SHong Zhang 
789b94d7dedSBarry Smith   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST;
7908c5e8961SHong Zhang   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
7913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
792b24902e0SBarry Smith }
793b24902e0SBarry Smith 
MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo * info)794d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info)
795d71ae5a4SJacob Faibussowitsch {
796bf99a652SHong Zhang   PetscFunctionBegin;
7979566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info));
798bf99a652SHong Zhang   F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST;
7993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
800bf99a652SHong Zhang }
801bf99a652SHong Zhang 
MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType * type)802d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type)
803d71ae5a4SJacob Faibussowitsch {
80435bd34faSBarry Smith   PetscFunctionBegin;
8052692d6eeSBarry Smith   *type = MATSOLVERSUPERLU_DIST;
8063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
80735bd34faSBarry Smith }
80835bd34faSBarry Smith 
MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer)809d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer)
810d71ae5a4SJacob Faibussowitsch {
811a305db0bSBarry Smith   Mat_SuperLU_DIST      *lu = (Mat_SuperLU_DIST *)A->data;
812a305db0bSBarry Smith   superlu_dist_options_t options;
813a305db0bSBarry Smith 
814a305db0bSBarry Smith   PetscFunctionBegin;
815a305db0bSBarry Smith   /* check if matrix is superlu_dist type */
8163ba16761SJacob Faibussowitsch   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS);
817a305db0bSBarry Smith 
818a305db0bSBarry Smith   options = lu->options;
8199566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n"));
820c0aa6a63SJacob Faibussowitsch   /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the
821c0aa6a63SJacob Faibussowitsch    * format spec for int64_t is set to %d for whatever reason */
8229566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol));
8239e48eb9fSliuyangzhuan #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0)
82448a46eb9SPierre Jolivet   if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using 3d decomposition with npdep %lld \n", (long long)lu->npdep));
8253e558968SBarry Smith #endif
8263e558968SBarry Smith 
8279566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equilibrate matrix %s \n", PetscBools[options.Equil != NO]));
8289566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO]));
8299566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE]));
8309566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol));
8310425e358SSatish Balay 
8320425e358SSatish Balay   switch (options.RowPerm) {
833d71ae5a4SJacob Faibussowitsch   case NOROWPERM:
834d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation NOROWPERM\n"));
835d71ae5a4SJacob Faibussowitsch     break;
836d71ae5a4SJacob Faibussowitsch   case LargeDiag_MC64:
837d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_MC64\n"));
838d71ae5a4SJacob Faibussowitsch     break;
839d71ae5a4SJacob Faibussowitsch   case LargeDiag_AWPM:
840d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation LargeDiag_AWPM\n"));
841d71ae5a4SJacob Faibussowitsch     break;
842d71ae5a4SJacob Faibussowitsch   case MY_PERMR:
843d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Row permutation MY_PERMR\n"));
844d71ae5a4SJacob Faibussowitsch     break;
845d71ae5a4SJacob Faibussowitsch   default:
846d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
8470425e358SSatish Balay   }
848a305db0bSBarry Smith 
849a305db0bSBarry Smith   switch (options.ColPerm) {
850d71ae5a4SJacob Faibussowitsch   case NATURAL:
851d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation NATURAL\n"));
852d71ae5a4SJacob Faibussowitsch     break;
853d71ae5a4SJacob Faibussowitsch   case MMD_AT_PLUS_A:
854d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_AT_PLUS_A\n"));
855d71ae5a4SJacob Faibussowitsch     break;
856d71ae5a4SJacob Faibussowitsch   case MMD_ATA:
857d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation MMD_ATA\n"));
858d71ae5a4SJacob Faibussowitsch     break;
859bf3e94a3SBarry Smith   /*  Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */
860d71ae5a4SJacob Faibussowitsch   case METIS_AT_PLUS_A:
861d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation METIS_AT_PLUS_A\n"));
862d71ae5a4SJacob Faibussowitsch     break;
863d71ae5a4SJacob Faibussowitsch   case PARMETIS:
864d71ae5a4SJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Column permutation PARMETIS\n"));
865d71ae5a4SJacob Faibussowitsch     break;
866d71ae5a4SJacob Faibussowitsch   default:
867d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation");
868a305db0bSBarry Smith   }
869a305db0bSBarry Smith 
8709566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO]));
871a305db0bSBarry Smith 
872a305db0bSBarry Smith   if (lu->FactPattern == SamePattern) {
8739566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern\n"));
8741639e5c1SHong Zhang   } else if (lu->FactPattern == SamePattern_SameRowPerm) {
8759566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization SamePattern_SameRowPerm\n"));
8761639e5c1SHong Zhang   } else if (lu->FactPattern == DOFACT) {
8779566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  Repeated factorization DOFACT\n"));
878b2d1094fSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern");
879a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
880b2d1094fSBarry Smith   if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, "  Using SuperLU_DIST in single precision\n"));
881a6ad605dSBarry Smith #endif
8823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
883a305db0bSBarry Smith }
884a305db0bSBarry Smith 
MatView_SuperLU_DIST(Mat A,PetscViewer viewer)885d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer)
886d71ae5a4SJacob Faibussowitsch {
8879f196a02SMartin Diehl   PetscBool         isascii;
888a305db0bSBarry Smith   PetscViewerFormat format;
889a305db0bSBarry Smith 
890a305db0bSBarry Smith   PetscFunctionBegin;
8919f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
8929f196a02SMartin Diehl   if (isascii) {
8939566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
89448a46eb9SPierre Jolivet     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer));
895a305db0bSBarry Smith   }
8963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
897a305db0bSBarry Smith }
898a305db0bSBarry Smith 
MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat * F)899d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F)
900d71ae5a4SJacob Faibussowitsch {
901b24902e0SBarry Smith   Mat                    B;
902f0c56d0fSKris Buschelman   Mat_SuperLU_DIST      *lu;
90326cc229bSBarry Smith   PetscInt               M = A->rmap->N, N = A->cmap->N;
904ea2994c3SHong Zhang   PetscMPIInt            size;
90572210a05SBarry Smith   superlu_dist_options_t options;
906b2d1094fSBarry Smith   PetscBool              flg;
9076e4037fdSJunchao Zhang   PetscPrecision         precision = PETSC_PRECISION_INVALID;
908448de63eSKris Buschelman 
909f0c56d0fSKris Buschelman   PetscFunctionBegin;
910448de63eSKris Buschelman   /* Create the factorization matrix */
9119566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
9129566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N));
9139566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name));
9149566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
915ecddaf3cSBarry Smith   B->ops->getinfo = MatGetInfo_External;
9162877fffaSHong Zhang   B->ops->view    = MatView_SuperLU_DIST;
917bacf2223SBarry Smith   B->ops->destroy = MatDestroy_SuperLU_DIST;
9182205254eSKarl Rupp 
9193e56d16dSNuno Nobre   /* set_default_options_dist() sets the default input options to the following values:
92019660a1cSHong Zhang      options.Fact                = DOFACT;
92119660a1cSHong Zhang      options.Equil               = YES;
9223348698aSHong Zhang      options.ParSymbFact         = NO;
9236b508d82SHong Zhang      options.ColPerm             = METIS_AT_PLUS_A;
9240425e358SSatish Balay      options.RowPerm             = LargeDiag_MC64;
9253e56d16dSNuno Nobre      options.ReplaceTinyPivot    = NO;
9263e56d16dSNuno Nobre      options.IterRefine          = SLU_DOUBLE;
9273348698aSHong Zhang      options.Trans               = NOTRANS;
92849d41e62SHong Zhang      options.SolveInitialized    = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
92919660a1cSHong Zhang      options.RefineInitialized   = NO;
93019660a1cSHong Zhang      options.PrintStat           = YES;
9313e56d16dSNuno Nobre      options.superlu_acc_offload = 1;
932c8f76b2fSStefano Zampini      options.SymPattern          = NO;
93319660a1cSHong Zhang   */
934ba45f930SHong Zhang   set_default_options_dist(&options);
935e95ce8a1SHong Zhang 
93666e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
937c8f76b2fSStefano Zampini   if (ftype == MAT_FACTOR_LU) {
938c8f76b2fSStefano Zampini     B->factortype            = MAT_FACTOR_LU;
939c8f76b2fSStefano Zampini     B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
940c8f76b2fSStefano Zampini   } else {
941c8f76b2fSStefano Zampini     B->factortype                  = MAT_FACTOR_CHOLESKY;
942c8f76b2fSStefano Zampini     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST;
943c8f76b2fSStefano Zampini     options.SymPattern             = YES;
944c8f76b2fSStefano Zampini   }
945c8f76b2fSStefano Zampini 
946c8f76b2fSStefano Zampini   /* set solvertype */
9479566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
9489566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype));
949c8f76b2fSStefano Zampini 
9504dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&lu));
951c8f76b2fSStefano Zampini   B->data = lu;
9529566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
953ed876474SBarry Smith 
954448de63eSKris Buschelman   lu->options              = options;
95519660a1cSHong Zhang   lu->options.Fact         = DOFACT;
95649d41e62SHong Zhang   lu->matsolve_iscalled    = PETSC_FALSE;
95749d41e62SHong Zhang   lu->matmatsolve_iscalled = PETSC_FALSE;
9582205254eSKarl Rupp 
959cd0670b2SPierre Jolivet   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "SuperLU_DIST Options", "Mat");
9606e4037fdSJunchao Zhang   PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by SuperLU_DIST", "MATSOLVERSUPERLU_DIST", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, &flg));
9616e4037fdSJunchao Zhang   PetscOptionsEnd();
962b2d1094fSBarry Smith   if (flg) {
9636e4037fdSJunchao Zhang     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single or double as option for SuperLU_DIST");
964a6ad605dSBarry Smith #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
9656e4037fdSJunchao Zhang     lu->singleprecision = (PetscBool)(precision == PETSC_PRECISION_SINGLE); // It also implies PetscReal is not single; not merely SuperLU_DIST is running in single
966b2d1094fSBarry Smith #endif
967b2d1094fSBarry Smith   }
968b2d1094fSBarry Smith 
9699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist));
9709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST));
971fb785984SHong Zhang 
972448de63eSKris Buschelman   *F = B;
9733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
974448de63eSKris Buschelman }
975f3c0ef26SHong Zhang 
MatSolverTypeRegister_SuperLU_DIST(void)976d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void)
977d71ae5a4SJacob Faibussowitsch {
978f3c0ef26SHong Zhang   PetscFunctionBegin;
9799566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
9809566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist));
9819566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
9829566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist));
9837def7855SStefano Zampini   if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) {
9848434afd1SBarry Smith     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL));
9857def7855SStefano Zampini     PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free));
9867def7855SStefano Zampini   }
9873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
988f3c0ef26SHong Zhang }
989448de63eSKris Buschelman 
99024b6179bSKris Buschelman /*MC
9912692d6eeSBarry Smith   MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
99224b6179bSKris Buschelman 
9932ef1f0ffSBarry Smith   Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch`  to have PETSc installed with SuperLU_DIST
994c2b89b5dSBarry Smith 
9952ef1f0ffSBarry Smith   Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver
996c2b89b5dSBarry Smith 
99711a5261eSBarry Smith    Works with `MATAIJ` matrices
99824b6179bSKris Buschelman 
99924b6179bSKris Buschelman   Options Database Keys:
100041c8de11SBarry Smith + -mat_superlu_dist_r <n> - number of rows in processor partition
100124b6179bSKris Buschelman . -mat_superlu_dist_c <n> - number of columns in processor partition
10023e558968SBarry Smith . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later
10032ef1f0ffSBarry Smith . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided
100424b6179bSKris Buschelman . -mat_superlu_dist_equil - equilibrate the matrix
10050425e358SSatish Balay . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation
1006f7374848SSatish Balay . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation
100724b6179bSKris Buschelman . -mat_superlu_dist_replacetinypivot - replace tiny pivots
10082ef1f0ffSBarry Smith . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT`
100924b6179bSKris Buschelman . -mat_superlu_dist_iterrefine - use iterative refinement
1010b2d1094fSBarry Smith . -mat_superlu_dist_printstat - print factorization information
1011a0940d2aSSatish Balay . -mat_superlu_dist_gpuoffload - offload factorization onto the GPUs, requires SuperLU_DIST 8.0.0 or later
1012dfe00d7bSSatish Balay - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision
101324b6179bSKris Buschelman 
101424b6179bSKris Buschelman   Level: beginner
101524b6179bSKris Buschelman 
10162ef1f0ffSBarry Smith   Note:
10173e56d16dSNuno Nobre     If PETSc was configured with `--with-cuda` then this solver will use the GPUs by default.
10182ef1f0ffSBarry Smith 
10191cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()`
102024b6179bSKris Buschelman M*/
1021