xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 834855d6effb0d027771461c8e947ee1ce5a1e17)
12ef1f0ffSBarry Smith /*
25a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
3c2b89b5dSBarry Smith      the SuperLU sparse solver.
414b396bbSKris Buschelman */
514b396bbSKris Buschelman 
65a46d3faSBarry Smith /*
75a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
85a46d3faSBarry Smith */
9c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
1014b396bbSKris Buschelman 
115a46d3faSBarry Smith /*
125a46d3faSBarry Smith      SuperLU include files
135a46d3faSBarry Smith */
1414b396bbSKris Buschelman EXTERN_C_BEGIN
1514b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
163cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
173cb270beSHong Zhang     #include <slu_cdefs.h>
183cb270beSHong Zhang   #else
19c6db04a5SJed Brown     #include <slu_zdefs.h>
203cb270beSHong Zhang   #endif
213cb270beSHong Zhang #else
223cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
233cb270beSHong Zhang     #include <slu_sdefs.h>
2414b396bbSKris Buschelman   #else
25c6db04a5SJed Brown     #include <slu_ddefs.h>
2614b396bbSKris Buschelman   #endif
273cb270beSHong Zhang #endif
28c6db04a5SJed Brown #include <slu_util.h>
2914b396bbSKris Buschelman EXTERN_C_END
3014b396bbSKris Buschelman 
315a46d3faSBarry Smith /*
32245fece9SBarry Smith      This is the data that defines the SuperLU factored matrix type
335a46d3faSBarry Smith */
3414b396bbSKris Buschelman typedef struct {
3575af56d4SHong Zhang   SuperMatrix       A, L, U, B, X;
3675af56d4SHong Zhang   superlu_options_t options;
37da7a1d00SHong Zhang   PetscInt         *perm_c; /* column permutation vector */
38da7a1d00SHong Zhang   PetscInt         *perm_r; /* row permutations from partial pivoting */
39da7a1d00SHong Zhang   PetscInt         *etree;
40da7a1d00SHong Zhang   PetscReal        *R, *C;
4175af56d4SHong Zhang   char              equed[1];
42da7a1d00SHong Zhang   PetscInt          lwork;
4375af56d4SHong Zhang   void             *work;
44da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4575af56d4SHong Zhang   mem_usage_t       mem_usage;
4675af56d4SHong Zhang   MatStructure      flg;
475d8b2efaSHong Zhang   SuperLUStat_t     stat;
48cae5a23dSHong Zhang   Mat               A_dup;
49cae5a23dSHong Zhang   PetscScalar      *rhs_dup;
50c147c726SHong Zhang   GlobalLU_t        Glu;
512366e350SStefano Zampini   PetscBool         needconversion;
5214b396bbSKris Buschelman 
5314b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
54ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
55f0c56d0fSKris Buschelman } Mat_SuperLU;
5614b396bbSKris Buschelman 
575a46d3faSBarry Smith /*
585a46d3faSBarry Smith     Utility function
595a46d3faSBarry Smith */
MatView_Info_SuperLU(Mat A,PetscViewer viewer)60d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_Info_SuperLU(Mat A, PetscViewer viewer)
61d71ae5a4SJacob Faibussowitsch {
62245fece9SBarry Smith   Mat_SuperLU      *lu = (Mat_SuperLU *)A->data;
635a46d3faSBarry Smith   superlu_options_t options;
645a46d3faSBarry Smith 
655a46d3faSBarry Smith   PetscFunctionBegin;
665a46d3faSBarry Smith   options = lu->options;
672205254eSKarl Rupp 
689566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU run parameters:\n"));
699566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equil: %s\n", (options.Equil != NO) ? "YES" : "NO"));
709566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ColPerm: %" PetscInt_FMT "\n", options.ColPerm));
719566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  IterRefine: %" PetscInt_FMT "\n", options.IterRefine));
729566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  SymmetricMode: %s\n", (options.SymmetricMode != NO) ? "YES" : "NO"));
739566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  DiagPivotThresh: %g\n", options.DiagPivotThresh));
749566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  PivotGrowth: %s\n", (options.PivotGrowth != NO) ? "YES" : "NO"));
759566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ConditionNumber: %s\n", (options.ConditionNumber != NO) ? "YES" : "NO"));
769566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  RowPerm: %" PetscInt_FMT "\n", options.RowPerm));
779566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ReplaceTinyPivot: %s\n", (options.ReplaceTinyPivot != NO) ? "YES" : "NO"));
789566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  PrintStat: %s\n", (options.PrintStat != NO) ? "YES" : "NO"));
799566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  lwork: %" PetscInt_FMT "\n", lu->lwork));
80d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
819566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropTol: %g\n", options.ILU_DropTol));
829566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillTol: %g\n", options.ILU_FillTol));
839566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillFactor: %g\n", options.ILU_FillFactor));
849566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropRule: %" PetscInt_FMT "\n", options.ILU_DropRule));
859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_Norm: %" PetscInt_FMT "\n", options.ILU_Norm));
869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_MILU: %" PetscInt_FMT "\n", options.ILU_MILU));
87cffbb591SHong Zhang   }
883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
895a46d3faSBarry Smith }
905a46d3faSBarry Smith 
MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)91ba38deedSJacob Faibussowitsch static PetscErrorCode MatSolve_SuperLU_Private(Mat A, Vec b, Vec x)
92d71ae5a4SJacob Faibussowitsch {
93245fece9SBarry Smith   Mat_SuperLU       *lu = (Mat_SuperLU *)A->data;
94245fece9SBarry Smith   const PetscScalar *barray;
95245fece9SBarry Smith   PetscScalar       *xarray;
96245fece9SBarry Smith   PetscInt           info, i, n;
97245fece9SBarry Smith   PetscReal          ferr, berr;
98245fece9SBarry Smith   static PetscBool   cite = PETSC_FALSE;
99245fece9SBarry Smith 
100245fece9SBarry Smith   PetscFunctionBegin;
1013ba16761SJacob Faibussowitsch   if (lu->lwork == -1) PetscFunctionReturn(PETSC_SUCCESS);
1029371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial "
1039371c9d4SSatish Balay                                    "pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",
1049371c9d4SSatish Balay                                    &cite));
105245fece9SBarry Smith 
1069566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(x, &n));
107245fece9SBarry Smith   lu->B.ncol = 1; /* Set the number of right-hand side */
108245fece9SBarry Smith   if (lu->options.Equil && !lu->rhs_dup) {
109245fece9SBarry Smith     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
1109566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &lu->rhs_dup));
111245fece9SBarry Smith   }
112245fece9SBarry Smith   if (lu->options.Equil) {
113245fece9SBarry Smith     /* Copy b into rsh_dup */
1149566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b, &barray));
1159566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(lu->rhs_dup, barray, n));
1169566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(b, &barray));
117245fece9SBarry Smith     barray = lu->rhs_dup;
118245fece9SBarry Smith   } else {
1199566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b, &barray));
120245fece9SBarry Smith   }
1219566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &xarray));
122245fece9SBarry Smith 
123245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
124245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
125245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (singlecomplex *)barray;
126245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = (singlecomplex *)xarray;
127245fece9SBarry Smith   #else
128245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (doublecomplex *)barray;
129245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = (doublecomplex *)xarray;
130245fece9SBarry Smith   #endif
131245fece9SBarry Smith #else
132245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (void *)barray;
133245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = xarray;
134245fece9SBarry Smith #endif
135245fece9SBarry Smith 
136245fece9SBarry Smith   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
137245fece9SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
138245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
139245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1409371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
141245fece9SBarry Smith   #else
1429371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
143245fece9SBarry Smith   #endif
144245fece9SBarry Smith #else
145245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1469371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
147245fece9SBarry Smith   #else
1489371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
149245fece9SBarry Smith   #endif
150245fece9SBarry Smith #endif
151245fece9SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
152245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
153245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1549371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
155245fece9SBarry Smith   #else
1569371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
157245fece9SBarry Smith   #endif
158245fece9SBarry Smith #else
159245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1609371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
161245fece9SBarry Smith   #else
1629371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
163245fece9SBarry Smith   #endif
164245fece9SBarry Smith #endif
165245fece9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
16648a46eb9SPierre Jolivet   if (!lu->options.Equil) PetscCall(VecRestoreArrayRead(b, &barray));
1679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &xarray));
168245fece9SBarry Smith 
169245fece9SBarry Smith   if (!info || info == lu->A.ncol + 1) {
170245fece9SBarry Smith     if (lu->options.IterRefine) {
1719566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "Iterative Refinement:\n"));
1729566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"));
1739566063dSJacob Faibussowitsch       for (i = 0; i < 1; ++i) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8d%8d%16e%16e\n", i + 1, lu->stat.RefineSteps, ferr, berr));
174245fece9SBarry Smith     }
175245fece9SBarry Smith   } else if (info > 0) {
176245fece9SBarry Smith     if (lu->lwork == -1) {
1779566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol));
178245fece9SBarry Smith     } else {
1799566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Warning: gssvx() returns info %" PetscInt_FMT "\n", info));
180245fece9SBarry Smith     }
1815f80ce2aSJacob Faibussowitsch   } else PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info, -info);
182245fece9SBarry Smith 
183245fece9SBarry Smith   if (lu->options.PrintStat) {
1849566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatSolve__SuperLU():\n"));
185e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
186245fece9SBarry Smith   }
1873ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
188245fece9SBarry Smith }
189245fece9SBarry Smith 
MatSolve_SuperLU(Mat A,Vec b,Vec x)190ba38deedSJacob Faibussowitsch static PetscErrorCode MatSolve_SuperLU(Mat A, Vec b, Vec x)
191d71ae5a4SJacob Faibussowitsch {
192245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
19378bc9606SHong Zhang   trans_t      oldOption;
194245fece9SBarry Smith 
195245fece9SBarry Smith   PetscFunctionBegin;
196f480ea8aSBarry Smith   PetscCall(VecFlag(x, A->factorerrortype));
197603e8f96SBarry Smith   if (A->factorerrortype) {
1989566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
1993ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
200245fece9SBarry Smith   }
201245fece9SBarry Smith 
20278bc9606SHong Zhang   oldOption         = lu->options.Trans;
203245fece9SBarry Smith   lu->options.Trans = TRANS;
2049566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A, b, x));
20578bc9606SHong Zhang   lu->options.Trans = oldOption;
2063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
207245fece9SBarry Smith }
208245fece9SBarry Smith 
MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)209ba38deedSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_SuperLU(Mat A, Vec b, Vec x)
210d71ae5a4SJacob Faibussowitsch {
211245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
21278bc9606SHong Zhang   trans_t      oldOption;
213245fece9SBarry Smith 
214245fece9SBarry Smith   PetscFunctionBegin;
215f480ea8aSBarry Smith   PetscCall(VecFlag(x, A->factorerrortype));
216603e8f96SBarry Smith   if (A->factorerrortype) {
2179566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
2183ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
219245fece9SBarry Smith   }
220245fece9SBarry Smith 
22178bc9606SHong Zhang   oldOption         = lu->options.Trans;
222245fece9SBarry Smith   lu->options.Trans = NOTRANS;
2239566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A, b, x));
22478bc9606SHong Zhang   lu->options.Trans = oldOption;
2253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
226245fece9SBarry Smith }
227245fece9SBarry Smith 
MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo * info)228d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F, Mat A, const MatFactorInfo *info)
229d71ae5a4SJacob Faibussowitsch {
230245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
231cae5a23dSHong Zhang   Mat_SeqAIJ  *aa;
2325a46d3faSBarry Smith   PetscInt     sinfo;
2335a46d3faSBarry Smith   PetscReal    ferr, berr;
2345a46d3faSBarry Smith   NCformat    *Ustore;
2355a46d3faSBarry Smith   SCformat    *Lstore;
2365a46d3faSBarry Smith 
2375a46d3faSBarry Smith   PetscFunctionBegin;
238da81f932SPierre Jolivet   if (lu->flg == SAME_NONZERO_PATTERN) { /* successive numerical factorization */
2395a46d3faSBarry Smith     lu->options.Fact = SamePattern;
2405a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
2415a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
2421baa6e33SBarry Smith     if (lu->A_dup) PetscCall(MatCopy_SeqAIJ(A, lu->A_dup, SAME_NONZERO_PATTERN));
2432366e350SStefano Zampini 
2445a46d3faSBarry Smith     if (lu->lwork >= 0) {
245e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
246e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
2475a46d3faSBarry Smith       lu->options.Fact = SamePattern;
2485a46d3faSBarry Smith     }
2495a46d3faSBarry Smith   }
2505a46d3faSBarry Smith 
2515a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
2525a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
2535a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
2542366e350SStefano Zampini   if (lu->A_dup) {
255f4f49eeaSPierre Jolivet     aa = (Mat_SeqAIJ *)lu->A_dup->data;
256cae5a23dSHong Zhang   } else {
25757508eceSPierre Jolivet     aa = (Mat_SeqAIJ *)A->data;
258cae5a23dSHong Zhang   }
2595a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2603cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
261e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_CompCol_Matrix", cCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (singlecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_C, SLU_GE));
2623cb270beSHong Zhang   #else
263e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_CompCol_Matrix", zCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (doublecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_Z, SLU_GE));
2643cb270beSHong Zhang   #endif
2653cb270beSHong Zhang #else
2663cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
267e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_CompCol_Matrix", sCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_S, SLU_GE));
2685a46d3faSBarry Smith   #else
269e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_CompCol_Matrix", dCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_D, SLU_GE));
2705a46d3faSBarry Smith   #endif
2713cb270beSHong Zhang #endif
2725a46d3faSBarry Smith 
2735a46d3faSBarry Smith   /* Numerical factorization */
2745a46d3faSBarry Smith   lu->B.ncol = 0; /* Indicate not to solve the system */
275d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
2765a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2773cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2789371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2793cb270beSHong Zhang   #else
2809371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2813cb270beSHong Zhang   #endif
2823cb270beSHong Zhang #else
2833cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2849371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2855a46d3faSBarry Smith   #else
2869371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2875a46d3faSBarry Smith   #endif
2883cb270beSHong Zhang #endif
289d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
290cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
291cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
2923cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2939371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2943cb270beSHong Zhang   #else
2959371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2963cb270beSHong Zhang   #endif
2973cb270beSHong Zhang #else
2983cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2999371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
300cffbb591SHong Zhang   #else
3019371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
302cffbb591SHong Zhang   #endif
3033cb270beSHong Zhang #endif
304f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
3055a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol + 1) {
30648a46eb9SPierre Jolivet     if (lu->options.PivotGrowth) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. pivot growth = %e\n", lu->rpg));
30748a46eb9SPierre Jolivet     if (lu->options.ConditionNumber) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. condition number = %e\n", lu->rcond));
3085a46d3faSBarry Smith   } else if (sinfo > 0) {
309675d1226SHong Zhang     if (A->erroriffailure) {
31098921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %" PetscInt_FMT, sinfo);
311675d1226SHong Zhang     } else {
312675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
313ad540459SPierre Jolivet         if (lu->options.ILU_FillTol == 0.0) F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3149566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n", sinfo, lu->options.ILU_FillTol));
315675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
316675d1226SHong Zhang         /*
317675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
318675d1226SHong Zhang                       precision, meaning that the matrix is singular to
319675d1226SHong Zhang                       working precision. Nevertheless, the solution and
320675d1226SHong Zhang                       error bounds are computed because there are a number
321675d1226SHong Zhang                       of situations where the computed solution can be more
322675d1226SHong Zhang                       accurate than the value of RCOND would suggest.
323675d1226SHong Zhang          */
3249d3446b2SPierre Jolivet         PetscCall(PetscInfo(F, "Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT "\n", sinfo));
325675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
326603e8f96SBarry Smith         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
3279566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n", sinfo));
328675d1226SHong Zhang       }
329675d1226SHong Zhang     }
33098921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo, -sinfo);
3315a46d3faSBarry Smith 
3325a46d3faSBarry Smith   if (lu->options.PrintStat) {
3339566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatLUFactorNumeric_SuperLU():\n"));
334e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
3355a46d3faSBarry Smith     Lstore = (SCformat *)lu->L.Store;
3365a46d3faSBarry Smith     Ustore = (NCformat *)lu->U.Store;
3379566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz));
3389566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz));
3399566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol));
3409566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu / 1e6, lu->mem_usage.total_needed / 1e6));
3415a46d3faSBarry Smith   }
3425a46d3faSBarry Smith 
3435a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
3441d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
3451d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
3461aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
3473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3485a46d3faSBarry Smith }
3495a46d3faSBarry Smith 
MatDestroy_SuperLU(Mat A)350d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SuperLU(Mat A)
351d71ae5a4SJacob Faibussowitsch {
352245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
35314b396bbSKris Buschelman 
35414b396bbSKris Buschelman   PetscFunctionBegin;
355245fece9SBarry Smith   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
356e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->A));
3570e742b69SHong Zhang     if (lu->lwork >= 0) {
358e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
359e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
3600e742b69SHong Zhang     }
3610e742b69SHong Zhang   }
3627def7855SStefano Zampini   PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->B));
3637def7855SStefano Zampini   PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->X));
3647def7855SStefano Zampini   PetscStackCallExternalVoid("SuperLU:StatFree", StatFree(&lu->stat));
3659566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->etree));
3669566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_r));
3679566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_c));
3689566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->R));
3699566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->C));
3709566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->rhs_dup));
3719566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
3729566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
3730e742b69SHong Zhang 
374d954db57SHong Zhang   /* clear composed functions */
3759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
3769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluSetILUDropTol_C", NULL));
3773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
37814b396bbSKris Buschelman }
37914b396bbSKris Buschelman 
MatView_SuperLU(Mat A,PetscViewer viewer)380d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SuperLU(Mat A, PetscViewer viewer)
381d71ae5a4SJacob Faibussowitsch {
3829f196a02SMartin Diehl   PetscBool         isascii;
38314b396bbSKris Buschelman   PetscViewerFormat format;
38414b396bbSKris Buschelman 
38514b396bbSKris Buschelman   PetscFunctionBegin;
3869f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3879f196a02SMartin Diehl   if (isascii) {
3889566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
38948a46eb9SPierre Jolivet     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU(A, viewer));
39014b396bbSKris Buschelman   }
3913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39214b396bbSKris Buschelman }
39314b396bbSKris Buschelman 
MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)394d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
395d71ae5a4SJacob Faibussowitsch {
396f4f49eeaSPierre Jolivet   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
39726cc229bSBarry Smith   PetscInt     indx;
39826cc229bSBarry Smith   PetscBool    flg, set;
39926cc229bSBarry Smith   PetscReal    real_input;
400f0b74427SPierre Jolivet   const char  *colperm[]    = {"NATURAL", "MMD_ATA", "MMD_AT_PLUS_A", "COLAMD"}; /* MY_PERMC - not supported by the PETSc interface yet */
40126cc229bSBarry Smith   const char  *iterrefine[] = {"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
402f0b74427SPierre Jolivet   const char  *rowperm[]    = {"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the PETSc interface yet */
403b24902e0SBarry Smith 
404b24902e0SBarry Smith   PetscFunctionBegin;
40526cc229bSBarry Smith   /* Set options to F */
40626cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU Options", "Mat");
40726cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_equil", "Equil", "None", (PetscBool)lu->options.Equil, (PetscBool *)&lu->options.Equil, NULL));
40826cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_colperm", "ColPerm", "None", colperm, 4, colperm[3], &indx, &flg));
40926cc229bSBarry Smith   if (flg) lu->options.ColPerm = (colperm_t)indx;
41026cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_iterrefine", "IterRefine", "None", iterrefine, 4, iterrefine[0], &indx, &flg));
41126cc229bSBarry Smith   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
41226cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_symmetricmode", "SymmetricMode", "None", (PetscBool)lu->options.SymmetricMode, &flg, &set));
41326cc229bSBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
41426cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_diagpivotthresh", "DiagPivotThresh", "None", lu->options.DiagPivotThresh, &real_input, &flg));
41526cc229bSBarry Smith   if (flg) lu->options.DiagPivotThresh = (double)real_input;
41626cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_pivotgrowth", "PivotGrowth", "None", (PetscBool)lu->options.PivotGrowth, &flg, &set));
41726cc229bSBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
41826cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_conditionnumber", "ConditionNumber", "None", (PetscBool)lu->options.ConditionNumber, &flg, &set));
41926cc229bSBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
42026cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_rowperm", "rowperm", "None", rowperm, 2, rowperm[lu->options.RowPerm], &indx, &flg));
42126cc229bSBarry Smith   if (flg) lu->options.RowPerm = (rowperm_t)indx;
42226cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_replacetinypivot", "ReplaceTinyPivot", "None", (PetscBool)lu->options.ReplaceTinyPivot, &flg, &set));
42326cc229bSBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
42426cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_printstat", "PrintStat", "None", (PetscBool)lu->options.PrintStat, &flg, &set));
42526cc229bSBarry Smith   if (set && flg) lu->options.PrintStat = YES;
42626cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_lwork", "size of work array in bytes used by factorization", "None", lu->lwork, &lu->lwork, NULL));
42726cc229bSBarry Smith   if (lu->lwork > 0) {
42826cc229bSBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
42926cc229bSBarry Smith     PetscCall(PetscMalloc(lu->lwork, &lu->work));
43026cc229bSBarry Smith   } else if (lu->lwork != 0 && lu->lwork != -1) {
43126cc229bSBarry Smith     PetscCall(PetscPrintf(PETSC_COMM_SELF, "   Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n", lu->lwork));
43226cc229bSBarry Smith     lu->lwork = 0;
43326cc229bSBarry Smith   }
43426cc229bSBarry Smith   /* ilu options */
43526cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_droptol", "ILU_DropTol", "None", lu->options.ILU_DropTol, &real_input, &flg));
43626cc229bSBarry Smith   if (flg) lu->options.ILU_DropTol = (double)real_input;
43726cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_filltol", "ILU_FillTol", "None", lu->options.ILU_FillTol, &real_input, &flg));
43826cc229bSBarry Smith   if (flg) lu->options.ILU_FillTol = (double)real_input;
43926cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_fillfactor", "ILU_FillFactor", "None", lu->options.ILU_FillFactor, &real_input, &flg));
44026cc229bSBarry Smith   if (flg) lu->options.ILU_FillFactor = (double)real_input;
44126cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_droprull", "ILU_DropRule", "None", lu->options.ILU_DropRule, &lu->options.ILU_DropRule, NULL));
44226cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_norm", "ILU_Norm", "None", lu->options.ILU_Norm, &indx, &flg));
44326cc229bSBarry Smith   if (flg) lu->options.ILU_Norm = (norm_t)indx;
44426cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_milu", "ILU_MILU", "None", lu->options.ILU_MILU, &indx, &flg));
44526cc229bSBarry Smith   if (flg) lu->options.ILU_MILU = (milu_t)indx;
44626cc229bSBarry Smith   PetscOptionsEnd();
44726cc229bSBarry Smith 
448b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
449b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
4501d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
4512366e350SStefano Zampini 
4522366e350SStefano Zampini   /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
4539566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
45448a46eb9SPierre Jolivet   if (lu->needconversion) PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &lu->A_dup));
4552366e350SStefano Zampini   if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
4569566063dSJacob Faibussowitsch     PetscCall(MatDuplicate_SeqAIJ(A, MAT_COPY_VALUES, &lu->A_dup));
4572366e350SStefano Zampini   }
4583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
459b24902e0SBarry Smith }
460b24902e0SBarry Smith 
MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)461d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F, PetscReal dtol)
462d71ae5a4SJacob Faibussowitsch {
463245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
4645ccb76cbSHong Zhang 
4655ccb76cbSHong Zhang   PetscFunctionBegin;
4665ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
4673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4685ccb76cbSHong Zhang }
4695ccb76cbSHong Zhang 
4705ccb76cbSHong Zhang /*@
4711d27aa22SBarry Smith   MatSuperluSetILUDropTol - Set SuperLU <https://portal.nersc.gov/project/sparse/superlu/superlu_ug.pdf> ILU drop tolerance
472147403d9SBarry Smith 
473c3339decSBarry Smith   Logically Collective
4745ccb76cbSHong Zhang 
4755ccb76cbSHong Zhang   Input Parameters:
4762ef1f0ffSBarry Smith + F    - the factored matrix obtained by calling `MatGetFactor()`
4775ccb76cbSHong Zhang - dtol - drop tolerance
4785ccb76cbSHong Zhang 
4793c7db156SBarry Smith   Options Database Key:
480147403d9SBarry Smith . -mat_superlu_ilu_droptol <dtol> - the drop tolerance
4815ccb76cbSHong Zhang 
4825ccb76cbSHong Zhang   Level: beginner
4835ccb76cbSHong Zhang 
4841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MATSOLVERSUPERLU`
4855ccb76cbSHong Zhang @*/
MatSuperluSetILUDropTol(Mat F,PetscReal dtol)486d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSuperluSetILUDropTol(Mat F, PetscReal dtol)
487d71ae5a4SJacob Faibussowitsch {
4885ccb76cbSHong Zhang   PetscFunctionBegin;
4895ccb76cbSHong Zhang   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
49069fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F, dtol, 2);
491cac4c232SBarry Smith   PetscTryMethod(F, "MatSuperluSetILUDropTol_C", (Mat, PetscReal), (F, dtol));
4923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4935ccb76cbSHong Zhang }
4945ccb76cbSHong Zhang 
MatFactorGetSolverType_seqaij_superlu(Mat A,MatSolverType * type)495ba38deedSJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A, MatSolverType *type)
496d71ae5a4SJacob Faibussowitsch {
49735bd34faSBarry Smith   PetscFunctionBegin;
4982692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
4993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
50035bd34faSBarry Smith }
50135bd34faSBarry Smith 
502b24902e0SBarry Smith /*MC
503ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
5041d27aa22SBarry Smith   via the external package SuperLU <https://portal.nersc.gov/project/sparse/superlu/superlu_ug.pdf>
505b24902e0SBarry Smith 
5062ef1f0ffSBarry Smith   Use `./configure --download-superlu` to have PETSc installed with SuperLU
507b24902e0SBarry Smith 
5082ef1f0ffSBarry Smith   Use `-pc_type lu` `-pc_factor_mat_solver_type superlu` to use this direct solver
509c2b89b5dSBarry Smith 
510b24902e0SBarry Smith   Options Database Keys:
511e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
5122ef1f0ffSBarry Smith . -mat_superlu_colperm <COLAMD>         - (choose one of) `NATURAL`, `MMD_ATA MMD_AT_PLUS_A`, `COLAMD`
5132ef1f0ffSBarry Smith . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) `NOREFINE`, `SINGLE`, `DOUBLE`, `EXTRA`
514e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
515e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
516e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
517e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
5182ef1f0ffSBarry Smith . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) `NOROWPERM`, `LargeDiag`
519e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
520e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
521e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
522e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
523e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
524e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
525e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
526e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
527e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
528b24902e0SBarry Smith 
5292ef1f0ffSBarry Smith    Level: beginner
5302ef1f0ffSBarry Smith 
53195452b02SPatrick Sanan    Notes:
53211a5261eSBarry Smith    Do not confuse this with `MATSOLVERSUPERLU_DIST` which is for parallel sparse solves
5335c9eb25fSBarry Smith 
53411a5261eSBarry Smith    Cannot use ordering provided by PETSc, provides its own.
5352c7c0729SBarry Smith 
5361cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCILU`, `MATSOLVERSUPERLU_DIST`, `MATSOLVERMUMPS`, `PCFactorSetMatSolverType()`, `MatSolverType`
537b24902e0SBarry Smith M*/
538b24902e0SBarry Smith 
MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat * F)539d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A, MatFactorType ftype, Mat *F)
540d71ae5a4SJacob Faibussowitsch {
54114b396bbSKris Buschelman   Mat          B;
542f0c56d0fSKris Buschelman   Mat_SuperLU *lu;
54326cc229bSBarry Smith   PetscInt     m = A->rmap->n, n = A->cmap->n;
54414b396bbSKris Buschelman 
54514b396bbSKris Buschelman   PetscFunctionBegin;
5469566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
5479566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DETERMINE, PETSC_DETERMINE));
5489566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("superlu", &((PetscObject)B)->type_name));
5499566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
55066e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
551*966bd95aSPierre Jolivet   PetscCheck(ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
552b24902e0SBarry Smith   B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
553cffbb591SHong Zhang   B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
554cffbb591SHong Zhang 
5559566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
5569566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU, &B->solvertype));
55700c67f3bSHong Zhang 
558b9c12af5SBarry Smith   B->ops->getinfo = MatGetInfo_External;
559b24902e0SBarry Smith   B->ops->destroy = MatDestroy_SuperLU;
5603519fcdcSHong Zhang   B->ops->view    = MatView_SuperLU;
561d5f3da31SBarry Smith   B->factortype   = ftype;
56294b7f48cSBarry Smith   B->assembled    = PETSC_TRUE; /* required by -ksp_view */
5635c9eb25fSBarry Smith   B->preallocated = PETSC_TRUE;
56414b396bbSKris Buschelman 
5654dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&lu));
566cae5a23dSHong Zhang 
567cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
5689ce50919SHong Zhang     set_default_options(&lu->options);
5693d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
5703d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
5713d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
5723d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
5733d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
5743d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
5753d6581fbSHong Zhang     */
5763d6581fbSHong Zhang     lu->options.Equil = NO;
577cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
5780c74a584SJed Brown     /* Set the default input options of ilu: */
579e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:ilu_set_default_options", ilu_set_default_options(&lu->options));
580cffbb591SHong Zhang   }
5819ce50919SHong Zhang   lu->options.PrintStat = NO;
5821a2e2f44SHong Zhang 
5835d8b2efaSHong Zhang   /* Initialize the statistics variables. */
584e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:StatInit", StatInit(&lu->stat));
5858914a3f7SHong Zhang   lu->lwork = 0; /* allocate space internally by system malloc */
5869ce50919SHong Zhang 
5875d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
5889566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->etree));
5899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lu->perm_r));
5909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->perm_c));
5919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lu->R));
5929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->C));
5935d8b2efaSHong Zhang 
5945d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
5955d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
5963cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
597e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
598e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
5993cb270beSHong Zhang   #else
600e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
601e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6023cb270beSHong Zhang   #endif
6033cb270beSHong Zhang #else
6043cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
605e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
606e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6075d8b2efaSHong Zhang   #else
608e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
609e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
6105d8b2efaSHong Zhang   #endif
6113cb270beSHong Zhang #endif
6125d8b2efaSHong Zhang 
6139566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_seqaij_superlu));
6149566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluSetILUDropTol_C", MatSuperluSetILUDropTol_SuperLU));
615245fece9SBarry Smith   B->data = lu;
61620be8e61SHong Zhang 
617899d7b4fSKris Buschelman   *F = B;
6183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
61914b396bbSKris Buschelman }
620d954db57SHong Zhang 
MatGetFactor_seqsell_superlu(Mat A,MatFactorType ftype,Mat * F)621d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A, MatFactorType ftype, Mat *F)
622d71ae5a4SJacob Faibussowitsch {
6232366e350SStefano Zampini   Mat_SuperLU *lu;
6242366e350SStefano Zampini 
6252366e350SStefano Zampini   PetscFunctionBegin;
6269566063dSJacob Faibussowitsch   PetscCall(MatGetFactor_seqaij_superlu(A, ftype, F));
6272366e350SStefano Zampini   lu                 = (Mat_SuperLU *)((*F)->data);
6282366e350SStefano Zampini   lu->needconversion = PETSC_TRUE;
6293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6302366e350SStefano Zampini }
6312366e350SStefano Zampini 
MatSolverTypeRegister_SuperLU(void)632d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void)
633d71ae5a4SJacob Faibussowitsch {
63442c9c57cSBarry Smith   PetscFunctionBegin;
6359566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_seqaij_superlu));
6369566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_ILU, MatGetFactor_seqaij_superlu));
6379566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_seqsell_superlu));
6389566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_ILU, MatGetFactor_seqsell_superlu));
6393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
64042c9c57cSBarry Smith }
641