xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision cf1aed2ce99d23e50336629af3ca8cf096900abb)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90 
91   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103   if (A->factortype == MAT_FACTOR_ILU) {
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115     These are the methods provided to REPLACE the corresponding methods of the
116    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
120 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
121 {
122   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123   Mat_SeqAIJ     *aa;
124   PetscErrorCode ierr;
125   PetscInt       sinfo;
126   PetscReal      ferr, berr;
127   NCformat       *Ustore;
128   SCformat       *Lstore;
129 
130   PetscFunctionBegin;
131   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
132     lu->options.Fact = SamePattern;
133     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
134     Destroy_SuperMatrix_Store(&lu->A);
135     if (lu->options.Equil) {
136       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137     }
138     if (lu->lwork >= 0) {
139       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
140       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
141       lu->options.Fact = SamePattern;
142     }
143   }
144 
145   /* Create the SuperMatrix for lu->A=A^T:
146        Since SuperLU likes column-oriented matrices,we pass it the transpose,
147        and then solve A^T X = B in MatSolve(). */
148   if (lu->options.Equil) {
149     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150   } else {
151     aa = (Mat_SeqAIJ*)(A)->data;
152   }
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_USE_REAL_SINGLE)
155   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
156 #else
157   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
158 #endif
159 #else
160 #if defined(PETSC_USE_REAL_SINGLE)
161   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
162 #else
163   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
164 #endif
165 #endif
166 
167   /* Numerical factorization */
168   lu->B.ncol = 0;  /* Indicate not to solve the system */
169   if (F->factortype == MAT_FACTOR_LU) {
170 #if defined(PETSC_USE_COMPLEX)
171 #if defined(PETSC_USE_REAL_SINGLE)
172     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174                                      &lu->mem_usage, &lu->stat, &sinfo));
175 #else
176     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178                                      &lu->mem_usage, &lu->stat, &sinfo));
179 #endif
180 #else
181 #if defined(PETSC_USE_REAL_SINGLE)
182     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
183                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184                                      &lu->mem_usage, &lu->stat, &sinfo));
185 #else
186     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
187                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188                                      &lu->mem_usage, &lu->stat, &sinfo));
189 #endif
190 #endif
191   } else if (F->factortype == MAT_FACTOR_ILU) {
192     /* Compute the incomplete factorization, condition number and pivot growth */
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_REAL_SINGLE)
195     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
196                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197                                      &lu->mem_usage, &lu->stat, &sinfo));
198 #else
199     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201                                      &lu->mem_usage, &lu->stat, &sinfo));
202 #endif
203 #else
204 #if defined(PETSC_USE_REAL_SINGLE)
205     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
206                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207                                      &lu->mem_usage, &lu->stat, &sinfo));
208 #else
209     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211                                      &lu->mem_usage, &lu->stat, &sinfo));
212 #endif
213 #endif
214   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   if (!sinfo || sinfo == lu->A.ncol+1) {
216     if (lu->options.PivotGrowth) {
217       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
218     }
219     if (lu->options.ConditionNumber) {
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221     }
222   } else if (sinfo > 0) {
223     if (lu->lwork == -1) {
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
225     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227 
228   if (lu->options.PrintStat) {
229     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
231     Lstore = (SCformat*) lu->L.Store;
232     Ustore = (NCformat*) lu->U.Store;
233     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
237                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
238   }
239 
240   lu->flg                = SAME_NONZERO_PATTERN;
241   F->ops->solve          = MatSolve_SuperLU;
242   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243   F->ops->matsolve       = MatMatSolve_SuperLU;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SuperLU"
249 PetscErrorCode MatDestroy_SuperLU(Mat A)
250 {
251   PetscErrorCode ierr;
252   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
253 
254   PetscFunctionBegin;
255   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
256     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
257     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
258     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
259     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
260     if (lu->lwork >= 0) {
261       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
262       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
263     }
264   }
265   if (lu) {
266     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
267     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
268     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
269     ierr = PetscFree(lu->R);CHKERRQ(ierr);
270     ierr = PetscFree(lu->C);CHKERRQ(ierr);
271     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
272     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
273   }
274   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
275 
276   /* clear composed functions */
277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
279 
280   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatView_SuperLU"
286 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
287 {
288   PetscErrorCode    ierr;
289   PetscBool         iascii;
290   PetscViewerFormat format;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
294   if (iascii) {
295     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
296     if (format == PETSC_VIEWER_ASCII_INFO) {
297       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
298     }
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatSolve_SuperLU_Private"
306 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
307 {
308   Mat_SuperLU      *lu = (Mat_SuperLU*)A->spptr;
309   PetscScalar      *barray,*xarray;
310   PetscErrorCode   ierr;
311   PetscInt         info,i,n;
312   PetscReal        ferr,berr;
313   static PetscBool cite = PETSC_FALSE;
314 
315   PetscFunctionBegin;
316   if (lu->lwork == -1) PetscFunctionReturn(0);
317   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
318 
319   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
320   lu->B.ncol = 1;   /* Set the number of right-hand side */
321   if (lu->options.Equil && !lu->rhs_dup) {
322     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
323     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
324   }
325   if (lu->options.Equil) {
326     /* Copy b into rsh_dup */
327     ierr   = VecGetArray(b,&barray);CHKERRQ(ierr);
328     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
329     ierr   = VecRestoreArray(b,&barray);CHKERRQ(ierr);
330     barray = lu->rhs_dup;
331   } else {
332     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
333   }
334   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
335 
336 #if defined(PETSC_USE_COMPLEX)
337 #if defined(PETSC_USE_REAL_SINGLE)
338   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
339   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
340 #else
341   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
342   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
343 #endif
344 #else
345   ((DNformat*)lu->B.Store)->nzval = barray;
346   ((DNformat*)lu->X.Store)->nzval = xarray;
347 #endif
348 
349   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
350   if (A->factortype == MAT_FACTOR_LU) {
351 #if defined(PETSC_USE_COMPLEX)
352 #if defined(PETSC_USE_REAL_SINGLE)
353     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
354                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
355                                      &lu->mem_usage, &lu->stat, &info));
356 #else
357     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
358                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
359                                      &lu->mem_usage, &lu->stat, &info));
360 #endif
361 #else
362 #if defined(PETSC_USE_REAL_SINGLE)
363     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
364                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
365                                      &lu->mem_usage, &lu->stat, &info));
366 #else
367     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
368                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
369                                      &lu->mem_usage, &lu->stat, &info));
370 #endif
371 #endif
372   } else if (A->factortype == MAT_FACTOR_ILU) {
373 #if defined(PETSC_USE_COMPLEX)
374 #if defined(PETSC_USE_REAL_SINGLE)
375     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
376                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
377                                      &lu->mem_usage, &lu->stat, &info));
378 #else
379     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
380                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
381                                      &lu->mem_usage, &lu->stat, &info));
382 #endif
383 #else
384 #if defined(PETSC_USE_REAL_SINGLE)
385     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
386                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
387                                      &lu->mem_usage, &lu->stat, &info));
388 #else
389     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
390                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
391                                      &lu->mem_usage, &lu->stat, &info));
392 #endif
393 #endif
394   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
395   if (!lu->options.Equil) {
396     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
397   }
398   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
399 
400   if (!info || info == lu->A.ncol+1) {
401     if (lu->options.IterRefine) {
402       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
403       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
404       for (i = 0; i < 1; ++i) {
405         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
406       }
407     }
408   } else if (info > 0) {
409     if (lu->lwork == -1) {
410       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
411     } else {
412       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
413     }
414   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
415 
416   if (lu->options.PrintStat) {
417     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
418     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSolve_SuperLU"
425 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
426 {
427   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
428   PetscErrorCode ierr;
429 
430   PetscFunctionBegin;
431   lu->options.Trans = TRANS;
432 
433   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatSolveTranspose_SuperLU"
439 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
440 {
441   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
442   PetscErrorCode ierr;
443 
444   PetscFunctionBegin;
445   lu->options.Trans = NOTRANS;
446 
447   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatMatSolve_SuperLU"
453 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
454 {
455   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
456   PetscBool      flg;
457   PetscErrorCode ierr;
458 
459   PetscFunctionBegin;
460   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
461   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
462   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
463   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
464   lu->options.Trans = TRANS;
465   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
466   PetscFunctionReturn(0);
467 }
468 
469 /*
470    Note the r permutation is ignored
471 */
472 #undef __FUNCT__
473 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
474 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
475 {
476   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
477 
478   PetscFunctionBegin;
479   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
480   lu->CleanUpSuperLU      = PETSC_TRUE;
481   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
487 static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
488 {
489   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
490 
491   PetscFunctionBegin;
492   lu->options.ILU_DropTol = dtol;
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatSuperluSetILUDropTol"
498 /*@
499   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
500    Logically Collective on Mat
501 
502    Input Parameters:
503 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
504 -  dtol - drop tolerance
505 
506   Options Database:
507 .   -mat_superlu_ilu_droptol <dtol>
508 
509    Level: beginner
510 
511    References: SuperLU Users' Guide
512 
513 .seealso: MatGetFactor()
514 @*/
515 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
516 {
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
521   PetscValidLogicalCollectiveInt(F,dtol,2);
522   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
528 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
529 {
530   PetscFunctionBegin;
531   *type = MATSOLVERSUPERLU;
532   PetscFunctionReturn(0);
533 }
534 
535 
536 /*MC
537   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
538   via the external package SuperLU.
539 
540   Use ./configure --download-superlu to have PETSc installed with SuperLU
541 
542   Options Database Keys:
543 + -mat_superlu_equil <FALSE>            - Equil (None)
544 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
545 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
546 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
547 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
548 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
549 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
550 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
551 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
552 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
553 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
554 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
555 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
556 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
557 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
558 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
559 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
560 
561    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
562 
563    Level: beginner
564 
565 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
566 M*/
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
570 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
571 {
572   Mat            B;
573   Mat_SuperLU    *lu;
574   PetscErrorCode ierr;
575   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
576   PetscBool      flg;
577   PetscReal      real_input;
578   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
579   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
580   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
581 
582   PetscFunctionBegin;
583   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
584   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
585   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
586   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
587 
588   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
589     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
590     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
591   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
592 
593   B->ops->destroy = MatDestroy_SuperLU;
594   B->ops->view    = MatView_SuperLU;
595   B->factortype   = ftype;
596   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
597   B->preallocated = PETSC_TRUE;
598 
599   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
600 
601   if (ftype == MAT_FACTOR_LU) {
602     set_default_options(&lu->options);
603     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
604       "Whether or not the system will be equilibrated depends on the
605        scaling of the matrix A, but if equilibration is used, A is
606        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
607        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
608      We set 'options.Equil = NO' as default because additional space is needed for it.
609     */
610     lu->options.Equil = NO;
611   } else if (ftype == MAT_FACTOR_ILU) {
612     /* Set the default input options of ilu: */
613     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
614   }
615   lu->options.PrintStat = NO;
616 
617   /* Initialize the statistics variables. */
618   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
619   lu->lwork = 0;   /* allocate space internally by system malloc */
620 
621   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
622   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
623   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
624   if (flg) lu->options.ColPerm = (colperm_t)indx;
625   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
626   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
627   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
628   if (flg) lu->options.SymmetricMode = YES;
629   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
630   if (flg) lu->options.DiagPivotThresh = (double) real_input;
631   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
632   if (flg) lu->options.PivotGrowth = YES;
633   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
634   if (flg) lu->options.ConditionNumber = YES;
635   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
636   if (flg) lu->options.RowPerm = (rowperm_t)indx;
637   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
638   if (flg) lu->options.ReplaceTinyPivot = YES;
639   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
640   if (flg) lu->options.PrintStat = YES;
641   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
642   if (lu->lwork > 0) {
643     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
644   } else if (lu->lwork != 0 && lu->lwork != -1) {
645     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
646     lu->lwork = 0;
647   }
648   /* ilu options */
649   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
650   if (flg) lu->options.ILU_DropTol = (double) real_input;
651   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
652   if (flg) lu->options.ILU_FillTol = (double) real_input;
653   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
654   if (flg) lu->options.ILU_FillFactor = (double) real_input;
655   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
656   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
657   if (flg) lu->options.ILU_Norm = (norm_t)indx;
658   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
659   if (flg) lu->options.ILU_MILU = (milu_t)indx;
660   PetscOptionsEnd();
661   if (lu->options.Equil == YES) {
662     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
663     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
664   }
665 
666   /* Allocate spaces (notice sizes are for the transpose) */
667   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
668   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
669   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
670   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
671   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
672 
673   /* create rhs and solution x without allocate space for .Store */
674 #if defined(PETSC_USE_COMPLEX)
675 #if defined(PETSC_USE_REAL_SINGLE)
676   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
677   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
678 #else
679   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
680   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
681 #endif
682 #else
683 #if defined(PETSC_USE_REAL_SINGLE)
684   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
685   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
686 #else
687   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
688   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
689 #endif
690 #endif
691 
692   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
693   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
694   B->spptr = lu;
695   *F       = B;
696   PetscFunctionReturn(0);
697 }
698 
699