xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision df6f6c19b095f1abc1ac27fcbb9c2092a4a2a65e)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90 
91   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103   if (A->factortype == MAT_FACTOR_ILU) {
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115     These are the methods provided to REPLACE the corresponding methods of the
116    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
120 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
121 {
122   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123   Mat_SeqAIJ     *aa;
124   PetscErrorCode ierr;
125   PetscInt       sinfo;
126   PetscReal      ferr, berr;
127   NCformat       *Ustore;
128   SCformat       *Lstore;
129 
130   PetscFunctionBegin;
131   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
132     lu->options.Fact = SamePattern;
133     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
134     Destroy_SuperMatrix_Store(&lu->A);
135     if (lu->options.Equil) {
136       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137     }
138     if (lu->lwork >= 0) {
139       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
140       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
141       lu->options.Fact = SamePattern;
142     }
143   }
144 
145   /* Create the SuperMatrix for lu->A=A^T:
146        Since SuperLU likes column-oriented matrices,we pass it the transpose,
147        and then solve A^T X = B in MatSolve(). */
148   if (lu->options.Equil) {
149     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150   } else {
151     aa = (Mat_SeqAIJ*)(A)->data;
152   }
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_USE_REAL_SINGLE)
155   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
156 #else
157   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
158 #endif
159 #else
160 #if defined(PETSC_USE_REAL_SINGLE)
161   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
162 #else
163   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
164 #endif
165 #endif
166 
167   /* Numerical factorization */
168   lu->B.ncol = 0;  /* Indicate not to solve the system */
169   if (F->factortype == MAT_FACTOR_LU) {
170 #if defined(PETSC_USE_COMPLEX)
171 #if defined(PETSC_USE_REAL_SINGLE)
172     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174                                      &lu->mem_usage, &lu->stat, &sinfo));
175 #else
176     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178                                      &lu->mem_usage, &lu->stat, &sinfo));
179 #endif
180 #else
181 #if defined(PETSC_USE_REAL_SINGLE)
182     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
183                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184                                      &lu->mem_usage, &lu->stat, &sinfo));
185 #else
186     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
187                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188                                      &lu->mem_usage, &lu->stat, &sinfo));
189 #endif
190 #endif
191   } else if (F->factortype == MAT_FACTOR_ILU) {
192     /* Compute the incomplete factorization, condition number and pivot growth */
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_REAL_SINGLE)
195     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
196                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197                                      &lu->mem_usage, &lu->stat, &sinfo));
198 #else
199     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201                                      &lu->mem_usage, &lu->stat, &sinfo));
202 #endif
203 #else
204 #if defined(PETSC_USE_REAL_SINGLE)
205     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
206                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207                                      &lu->mem_usage, &lu->stat, &sinfo));
208 #else
209     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211                                      &lu->mem_usage, &lu->stat, &sinfo));
212 #endif
213 #endif
214   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   if (!sinfo || sinfo == lu->A.ncol+1) {
216     if (lu->options.PivotGrowth) {
217       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
218     }
219     if (lu->options.ConditionNumber) {
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221     }
222   } else if (sinfo > 0) {
223     if (lu->lwork == -1) {
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
225     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227 
228   if (lu->options.PrintStat) {
229     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
231     Lstore = (SCformat*) lu->L.Store;
232     Ustore = (NCformat*) lu->U.Store;
233     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
237                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
238   }
239 
240   lu->flg                = SAME_NONZERO_PATTERN;
241   F->ops->solve          = MatSolve_SuperLU;
242   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243   F->ops->matsolve       = MatMatSolve_SuperLU;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetDiagonal_SuperLU"
249 PetscErrorCode MatGetDiagonal_SuperLU(Mat A,Vec v)
250 {
251   PetscFunctionBegin;
252   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Mat type: SuperLU factor");
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatDestroy_SuperLU"
258 PetscErrorCode MatDestroy_SuperLU(Mat A)
259 {
260   PetscErrorCode ierr;
261   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
262 
263   PetscFunctionBegin;
264   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
265     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
266     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
267     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
268     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
269     if (lu->lwork >= 0) {
270       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
271       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
272     }
273   }
274   if (lu) {
275     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
276     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
277     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
278     ierr = PetscFree(lu->R);CHKERRQ(ierr);
279     ierr = PetscFree(lu->C);CHKERRQ(ierr);
280     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
281     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
282   }
283   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
284 
285   /* clear composed functions */
286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
288 
289   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatView_SuperLU"
295 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
296 {
297   PetscErrorCode    ierr;
298   PetscBool         iascii;
299   PetscViewerFormat format;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
303   if (iascii) {
304     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
305     if (format == PETSC_VIEWER_ASCII_INFO) {
306       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
307     }
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatSolve_SuperLU_Private"
315 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
316 {
317   Mat_SuperLU      *lu = (Mat_SuperLU*)A->spptr;
318   PetscScalar      *barray,*xarray;
319   PetscErrorCode   ierr;
320   PetscInt         info,i,n;
321   PetscReal        ferr,berr;
322   static PetscBool cite = PETSC_FALSE;
323 
324   PetscFunctionBegin;
325   if (lu->lwork == -1) PetscFunctionReturn(0);
326   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
327 
328   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
329   lu->B.ncol = 1;   /* Set the number of right-hand side */
330   if (lu->options.Equil && !lu->rhs_dup) {
331     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
332     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
333   }
334   if (lu->options.Equil) {
335     /* Copy b into rsh_dup */
336     ierr   = VecGetArray(b,&barray);CHKERRQ(ierr);
337     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
338     ierr   = VecRestoreArray(b,&barray);CHKERRQ(ierr);
339     barray = lu->rhs_dup;
340   } else {
341     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
342   }
343   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
344 
345 #if defined(PETSC_USE_COMPLEX)
346 #if defined(PETSC_USE_REAL_SINGLE)
347   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
348   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
349 #else
350   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
351   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
352 #endif
353 #else
354   ((DNformat*)lu->B.Store)->nzval = barray;
355   ((DNformat*)lu->X.Store)->nzval = xarray;
356 #endif
357 
358   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
359   if (A->factortype == MAT_FACTOR_LU) {
360 #if defined(PETSC_USE_COMPLEX)
361 #if defined(PETSC_USE_REAL_SINGLE)
362     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
363                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
364                                      &lu->mem_usage, &lu->stat, &info));
365 #else
366     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
367                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
368                                      &lu->mem_usage, &lu->stat, &info));
369 #endif
370 #else
371 #if defined(PETSC_USE_REAL_SINGLE)
372     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
373                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
374                                      &lu->mem_usage, &lu->stat, &info));
375 #else
376     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
377                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
378                                      &lu->mem_usage, &lu->stat, &info));
379 #endif
380 #endif
381   } else if (A->factortype == MAT_FACTOR_ILU) {
382 #if defined(PETSC_USE_COMPLEX)
383 #if defined(PETSC_USE_REAL_SINGLE)
384     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
385                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
386                                      &lu->mem_usage, &lu->stat, &info));
387 #else
388     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
389                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
390                                      &lu->mem_usage, &lu->stat, &info));
391 #endif
392 #else
393 #if defined(PETSC_USE_REAL_SINGLE)
394     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
395                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
396                                      &lu->mem_usage, &lu->stat, &info));
397 #else
398     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
399                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
400                                      &lu->mem_usage, &lu->stat, &info));
401 #endif
402 #endif
403   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
404   if (!lu->options.Equil) {
405     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
406   }
407   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
408 
409   if (!info || info == lu->A.ncol+1) {
410     if (lu->options.IterRefine) {
411       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
412       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
413       for (i = 0; i < 1; ++i) {
414         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
415       }
416     }
417   } else if (info > 0) {
418     if (lu->lwork == -1) {
419       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
420     } else {
421       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
422     }
423   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
424 
425   if (lu->options.PrintStat) {
426     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
427     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatSolve_SuperLU"
434 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
435 {
436   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
437   PetscErrorCode ierr;
438 
439   PetscFunctionBegin;
440   lu->options.Trans = TRANS;
441 
442   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSolveTranspose_SuperLU"
448 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
449 {
450   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
451   PetscErrorCode ierr;
452 
453   PetscFunctionBegin;
454   lu->options.Trans = NOTRANS;
455 
456   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatMatSolve_SuperLU"
462 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
463 {
464   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
465   PetscBool      flg;
466   PetscErrorCode ierr;
467 
468   PetscFunctionBegin;
469   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
470   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
471   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
472   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
473   lu->options.Trans = TRANS;
474   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
475   PetscFunctionReturn(0);
476 }
477 
478 /*
479    Note the r permutation is ignored
480 */
481 #undef __FUNCT__
482 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
483 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
484 {
485   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
486 
487   PetscFunctionBegin;
488   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
489   lu->CleanUpSuperLU      = PETSC_TRUE;
490   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
491   PetscFunctionReturn(0);
492 }
493 
494 #undef __FUNCT__
495 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
496 static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
497 {
498   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
499 
500   PetscFunctionBegin;
501   lu->options.ILU_DropTol = dtol;
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatSuperluSetILUDropTol"
507 /*@
508   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
509    Logically Collective on Mat
510 
511    Input Parameters:
512 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
513 -  dtol - drop tolerance
514 
515   Options Database:
516 .   -mat_superlu_ilu_droptol <dtol>
517 
518    Level: beginner
519 
520    References: SuperLU Users' Guide
521 
522 .seealso: MatGetFactor()
523 @*/
524 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
525 {
526   PetscErrorCode ierr;
527 
528   PetscFunctionBegin;
529   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
530   PetscValidLogicalCollectiveInt(F,dtol,2);
531   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
537 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
538 {
539   PetscFunctionBegin;
540   *type = MATSOLVERSUPERLU;
541   PetscFunctionReturn(0);
542 }
543 
544 
545 /*MC
546   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
547   via the external package SuperLU.
548 
549   Use ./configure --download-superlu to have PETSc installed with SuperLU
550 
551   Options Database Keys:
552 + -mat_superlu_equil <FALSE>            - Equil (None)
553 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
554 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
555 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
556 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
557 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
558 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
559 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
560 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
561 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
562 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
563 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
564 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
565 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
566 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
567 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
568 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
569 
570    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
571 
572    Level: beginner
573 
574 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
575 M*/
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
579 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
580 {
581   Mat            B;
582   Mat_SuperLU    *lu;
583   PetscErrorCode ierr;
584   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
585   PetscBool      flg,set;
586   PetscReal      real_input;
587   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
588   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
589   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
590 
591   PetscFunctionBegin;
592   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
593   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
594   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
595   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
596 
597   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
598     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
599     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
600   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
601 
602   B->ops->destroy     = MatDestroy_SuperLU;
603   B->ops->view        = MatView_SuperLU;
604   B->ops->getdiagonal = MatGetDiagonal_SuperLU;
605   B->factortype       = ftype;
606   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
607   B->preallocated     = PETSC_TRUE;
608 
609   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
610 
611   if (ftype == MAT_FACTOR_LU) {
612     set_default_options(&lu->options);
613     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
614       "Whether or not the system will be equilibrated depends on the
615        scaling of the matrix A, but if equilibration is used, A is
616        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
617        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
618      We set 'options.Equil = NO' as default because additional space is needed for it.
619     */
620     lu->options.Equil = NO;
621   } else if (ftype == MAT_FACTOR_ILU) {
622     /* Set the default input options of ilu: */
623     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
624   }
625   lu->options.PrintStat = NO;
626 
627   /* Initialize the statistics variables. */
628   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
629   lu->lwork = 0;   /* allocate space internally by system malloc */
630 
631   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
632   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr);
633   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
634   if (flg) lu->options.ColPerm = (colperm_t)indx;
635   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
636   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
637   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr);
638   if (set && flg) lu->options.SymmetricMode = YES;
639   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
640   if (flg) lu->options.DiagPivotThresh = (double) real_input;
641   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr);
642   if (set && flg) lu->options.PivotGrowth = YES;
643   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr);
644   if (set && flg) lu->options.ConditionNumber = YES;
645   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
646   if (flg) lu->options.RowPerm = (rowperm_t)indx;
647   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr);
648   if (set && flg) lu->options.ReplaceTinyPivot = YES;
649   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr);
650   if (set && flg) lu->options.PrintStat = YES;
651   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
652   if (lu->lwork > 0) {
653     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
654     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
655   } else if (lu->lwork != 0 && lu->lwork != -1) {
656     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
657     lu->lwork = 0;
658   }
659   /* ilu options */
660   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
661   if (flg) lu->options.ILU_DropTol = (double) real_input;
662   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
663   if (flg) lu->options.ILU_FillTol = (double) real_input;
664   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
665   if (flg) lu->options.ILU_FillFactor = (double) real_input;
666   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
667   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
668   if (flg) lu->options.ILU_Norm = (norm_t)indx;
669   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
670   if (flg) lu->options.ILU_MILU = (milu_t)indx;
671   PetscOptionsEnd();
672   if (lu->options.Equil == YES) {
673     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
674     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
675   }
676 
677   /* Allocate spaces (notice sizes are for the transpose) */
678   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
679   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
680   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
681   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
682   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
683 
684   /* create rhs and solution x without allocate space for .Store */
685 #if defined(PETSC_USE_COMPLEX)
686 #if defined(PETSC_USE_REAL_SINGLE)
687   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
688   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
689 #else
690   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
691   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
692 #endif
693 #else
694 #if defined(PETSC_USE_REAL_SINGLE)
695   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
696   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
697 #else
698   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
699   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
700 #endif
701 #endif
702 
703   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
704   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
705   B->spptr = lu;
706 
707   *F       = B;
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatSolverPackageRegister_SuperLU"
713 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_SuperLU(void)
714 {
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
719   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722