xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90 
91   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103   if (A->factortype == MAT_FACTOR_ILU) {
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115     These are the methods provided to REPLACE the corresponding methods of the
116    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
120 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
121 {
122   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123   Mat_SeqAIJ     *aa;
124   PetscErrorCode ierr;
125   PetscInt       sinfo;
126   PetscReal      ferr, berr;
127   NCformat       *Ustore;
128   SCformat       *Lstore;
129 
130   PetscFunctionBegin;
131   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
132     lu->options.Fact = SamePattern;
133     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
134     Destroy_SuperMatrix_Store(&lu->A);
135     if (lu->options.Equil) {
136       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137     }
138     if (lu->lwork >= 0) {
139       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
140       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
141       lu->options.Fact = SamePattern;
142     }
143   }
144 
145   /* Create the SuperMatrix for lu->A=A^T:
146        Since SuperLU likes column-oriented matrices,we pass it the transpose,
147        and then solve A^T X = B in MatSolve(). */
148   if (lu->options.Equil) {
149     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150   } else {
151     aa = (Mat_SeqAIJ*)(A)->data;
152   }
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_USE_REAL_SINGLE)
155   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
156 #else
157   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
158 #endif
159 #else
160 #if defined(PETSC_USE_REAL_SINGLE)
161   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
162 #else
163   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
164 #endif
165 #endif
166 
167   /* Numerical factorization */
168   lu->B.ncol = 0;  /* Indicate not to solve the system */
169   if (F->factortype == MAT_FACTOR_LU) {
170 #if defined(PETSC_USE_COMPLEX)
171 #if defined(PETSC_USE_REAL_SINGLE)
172     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174                                      &lu->mem_usage, &lu->stat, &sinfo));
175 #else
176     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178                                      &lu->mem_usage, &lu->stat, &sinfo));
179 #endif
180 #else
181 #if defined(PETSC_USE_REAL_SINGLE)
182     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
183                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184                                      &lu->mem_usage, &lu->stat, &sinfo));
185 #else
186     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
187                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188                                      &lu->mem_usage, &lu->stat, &sinfo));
189 #endif
190 #endif
191   } else if (F->factortype == MAT_FACTOR_ILU) {
192     /* Compute the incomplete factorization, condition number and pivot growth */
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_REAL_SINGLE)
195     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
196                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197                                      &lu->mem_usage, &lu->stat, &sinfo));
198 #else
199     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201                                      &lu->mem_usage, &lu->stat, &sinfo));
202 #endif
203 #else
204 #if defined(PETSC_USE_REAL_SINGLE)
205     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
206                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207                                      &lu->mem_usage, &lu->stat, &sinfo));
208 #else
209     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211                                      &lu->mem_usage, &lu->stat, &sinfo));
212 #endif
213 #endif
214   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   if (!sinfo || sinfo == lu->A.ncol+1) {
216     if (lu->options.PivotGrowth) {
217       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
218     }
219     if (lu->options.ConditionNumber) {
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221     }
222   } else if (sinfo > 0) {
223     if (lu->lwork == -1) {
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
225     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227 
228   if (lu->options.PrintStat) {
229     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
231     Lstore = (SCformat*) lu->L.Store;
232     Ustore = (NCformat*) lu->U.Store;
233     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
237                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
238   }
239 
240   lu->flg                = SAME_NONZERO_PATTERN;
241   F->ops->solve          = MatSolve_SuperLU;
242   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243   F->ops->matsolve       = MatMatSolve_SuperLU;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatGetDiagonal_SuperLU"
249 PetscErrorCode MatGetDiagonal_SuperLU(Mat A,Vec v)
250 {
251   PetscFunctionBegin;
252   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Mat type: SuperLU factor");
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatDestroy_SuperLU"
258 PetscErrorCode MatDestroy_SuperLU(Mat A)
259 {
260   PetscErrorCode ierr;
261   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
262 
263   PetscFunctionBegin;
264   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
265     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
266     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
267     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
268     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
269     if (lu->lwork >= 0) {
270       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
271       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
272     }
273   }
274   if (lu) {
275     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
276     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
277     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
278     ierr = PetscFree(lu->R);CHKERRQ(ierr);
279     ierr = PetscFree(lu->C);CHKERRQ(ierr);
280     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
281     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
282   }
283   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
284 
285   /* clear composed functions */
286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
288 
289   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatView_SuperLU"
295 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
296 {
297   PetscErrorCode    ierr;
298   PetscBool         iascii;
299   PetscViewerFormat format;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
303   if (iascii) {
304     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
305     if (format == PETSC_VIEWER_ASCII_INFO) {
306       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
307     }
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatSolve_SuperLU_Private"
315 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
316 {
317   Mat_SuperLU       *lu = (Mat_SuperLU*)A->spptr;
318   const PetscScalar *barray;
319   PetscScalar       *xarray;
320   PetscErrorCode    ierr;
321   PetscInt          info,i,n;
322   PetscReal         ferr,berr;
323   static PetscBool  cite = PETSC_FALSE;
324 
325   PetscFunctionBegin;
326   if (lu->lwork == -1) PetscFunctionReturn(0);
327   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
328 
329   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
330   lu->B.ncol = 1;   /* Set the number of right-hand side */
331   if (lu->options.Equil && !lu->rhs_dup) {
332     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
333     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
334   }
335   if (lu->options.Equil) {
336     /* Copy b into rsh_dup */
337     ierr   = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
338     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
339     ierr   = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
340     barray = lu->rhs_dup;
341   } else {
342     ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
343   }
344   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
345 
346 #if defined(PETSC_USE_COMPLEX)
347 #if defined(PETSC_USE_REAL_SINGLE)
348   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
349   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
350 #else
351   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
352   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
353 #endif
354 #else
355   ((DNformat*)lu->B.Store)->nzval = (void*)barray;
356   ((DNformat*)lu->X.Store)->nzval = xarray;
357 #endif
358 
359   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
360   if (A->factortype == MAT_FACTOR_LU) {
361 #if defined(PETSC_USE_COMPLEX)
362 #if defined(PETSC_USE_REAL_SINGLE)
363     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
364                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
365                                      &lu->mem_usage, &lu->stat, &info));
366 #else
367     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
368                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
369                                      &lu->mem_usage, &lu->stat, &info));
370 #endif
371 #else
372 #if defined(PETSC_USE_REAL_SINGLE)
373     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
374                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
375                                      &lu->mem_usage, &lu->stat, &info));
376 #else
377     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
378                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
379                                      &lu->mem_usage, &lu->stat, &info));
380 #endif
381 #endif
382   } else if (A->factortype == MAT_FACTOR_ILU) {
383 #if defined(PETSC_USE_COMPLEX)
384 #if defined(PETSC_USE_REAL_SINGLE)
385     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
386                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
387                                      &lu->mem_usage, &lu->stat, &info));
388 #else
389     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
390                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
391                                      &lu->mem_usage, &lu->stat, &info));
392 #endif
393 #else
394 #if defined(PETSC_USE_REAL_SINGLE)
395     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
396                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
397                                      &lu->mem_usage, &lu->stat, &info));
398 #else
399     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
400                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
401                                      &lu->mem_usage, &lu->stat, &info));
402 #endif
403 #endif
404   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
405   if (!lu->options.Equil) {
406     ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
407   }
408   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
409 
410   if (!info || info == lu->A.ncol+1) {
411     if (lu->options.IterRefine) {
412       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
413       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
414       for (i = 0; i < 1; ++i) {
415         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
416       }
417     }
418   } else if (info > 0) {
419     if (lu->lwork == -1) {
420       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
421     } else {
422       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
423     }
424   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
425 
426   if (lu->options.PrintStat) {
427     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
428     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
429   }
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSolve_SuperLU"
435 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
436 {
437   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
438   PetscErrorCode ierr;
439 
440   PetscFunctionBegin;
441   lu->options.Trans = TRANS;
442 
443   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatSolveTranspose_SuperLU"
449 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
450 {
451   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
452   PetscErrorCode ierr;
453 
454   PetscFunctionBegin;
455   lu->options.Trans = NOTRANS;
456 
457   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatMatSolve_SuperLU"
463 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
464 {
465   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
466   PetscBool      flg;
467   PetscErrorCode ierr;
468 
469   PetscFunctionBegin;
470   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
471   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
472   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
473   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
474   lu->options.Trans = TRANS;
475   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
476   PetscFunctionReturn(0);
477 }
478 
479 /*
480    Note the r permutation is ignored
481 */
482 #undef __FUNCT__
483 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
484 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
485 {
486   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
487 
488   PetscFunctionBegin;
489   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
490   lu->CleanUpSuperLU      = PETSC_TRUE;
491   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
497 static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
498 {
499   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
500 
501   PetscFunctionBegin;
502   lu->options.ILU_DropTol = dtol;
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatSuperluSetILUDropTol"
508 /*@
509   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
510    Logically Collective on Mat
511 
512    Input Parameters:
513 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
514 -  dtol - drop tolerance
515 
516   Options Database:
517 .   -mat_superlu_ilu_droptol <dtol>
518 
519    Level: beginner
520 
521    References: SuperLU Users' Guide
522 
523 .seealso: MatGetFactor()
524 @*/
525 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
526 {
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
531   PetscValidLogicalCollectiveInt(F,dtol,2);
532   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
538 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
539 {
540   PetscFunctionBegin;
541   *type = MATSOLVERSUPERLU;
542   PetscFunctionReturn(0);
543 }
544 
545 
546 /*MC
547   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
548   via the external package SuperLU.
549 
550   Use ./configure --download-superlu to have PETSc installed with SuperLU
551 
552   Options Database Keys:
553 + -mat_superlu_equil <FALSE>            - Equil (None)
554 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
555 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
556 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
557 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
558 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
559 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
560 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
561 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
562 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
563 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
564 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
565 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
566 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
567 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
568 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
569 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
570 
571    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
572 
573    Level: beginner
574 
575 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
576 M*/
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
580 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
581 {
582   Mat            B;
583   Mat_SuperLU    *lu;
584   PetscErrorCode ierr;
585   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
586   PetscBool      flg,set;
587   PetscReal      real_input;
588   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
589   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
590   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
591 
592   PetscFunctionBegin;
593   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
594   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
595   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
596   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
597 
598   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
599     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
600     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
601   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
602 
603   B->ops->destroy     = MatDestroy_SuperLU;
604   B->ops->view        = MatView_SuperLU;
605   B->ops->getdiagonal = MatGetDiagonal_SuperLU;
606   B->factortype       = ftype;
607   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
608   B->preallocated     = PETSC_TRUE;
609 
610   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
611 
612   if (ftype == MAT_FACTOR_LU) {
613     set_default_options(&lu->options);
614     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
615       "Whether or not the system will be equilibrated depends on the
616        scaling of the matrix A, but if equilibration is used, A is
617        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
618        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
619      We set 'options.Equil = NO' as default because additional space is needed for it.
620     */
621     lu->options.Equil = NO;
622   } else if (ftype == MAT_FACTOR_ILU) {
623     /* Set the default input options of ilu: */
624     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
625   }
626   lu->options.PrintStat = NO;
627 
628   /* Initialize the statistics variables. */
629   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
630   lu->lwork = 0;   /* allocate space internally by system malloc */
631 
632   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
633   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr);
634   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
635   if (flg) lu->options.ColPerm = (colperm_t)indx;
636   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
637   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
638   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr);
639   if (set && flg) lu->options.SymmetricMode = YES;
640   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
641   if (flg) lu->options.DiagPivotThresh = (double) real_input;
642   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr);
643   if (set && flg) lu->options.PivotGrowth = YES;
644   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr);
645   if (set && flg) lu->options.ConditionNumber = YES;
646   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
647   if (flg) lu->options.RowPerm = (rowperm_t)indx;
648   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr);
649   if (set && flg) lu->options.ReplaceTinyPivot = YES;
650   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr);
651   if (set && flg) lu->options.PrintStat = YES;
652   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
653   if (lu->lwork > 0) {
654     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
655     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
656   } else if (lu->lwork != 0 && lu->lwork != -1) {
657     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
658     lu->lwork = 0;
659   }
660   /* ilu options */
661   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
662   if (flg) lu->options.ILU_DropTol = (double) real_input;
663   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
664   if (flg) lu->options.ILU_FillTol = (double) real_input;
665   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
666   if (flg) lu->options.ILU_FillFactor = (double) real_input;
667   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
668   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
669   if (flg) lu->options.ILU_Norm = (norm_t)indx;
670   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
671   if (flg) lu->options.ILU_MILU = (milu_t)indx;
672   PetscOptionsEnd();
673   if (lu->options.Equil == YES) {
674     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
675     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
676   }
677 
678   /* Allocate spaces (notice sizes are for the transpose) */
679   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
680   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
681   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
682   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
683   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
684 
685   /* create rhs and solution x without allocate space for .Store */
686 #if defined(PETSC_USE_COMPLEX)
687 #if defined(PETSC_USE_REAL_SINGLE)
688   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
689   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
690 #else
691   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
692   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
693 #endif
694 #else
695 #if defined(PETSC_USE_REAL_SINGLE)
696   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
697   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
698 #else
699   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
700   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
701 #endif
702 #endif
703 
704   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
705   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
706   B->spptr = lu;
707 
708   *F       = B;
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatSolverPackageRegister_SuperLU"
714 PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_SuperLU(void)
715 {
716   PetscErrorCode ierr;
717 
718   PetscFunctionBegin;
719   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
720   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723