xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision fb8e56e08d4d0bfe9fc63603ca1f7fddd68abbdb)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90 
91   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103   if (A->factortype == MAT_FACTOR_ILU) {
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115     These are the methods provided to REPLACE the corresponding methods of the
116    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
120 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
121 {
122   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123   Mat_SeqAIJ     *aa;
124   PetscErrorCode ierr;
125   PetscInt       sinfo;
126   PetscReal      ferr, berr;
127   NCformat       *Ustore;
128   SCformat       *Lstore;
129 
130   PetscFunctionBegin;
131   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
132     lu->options.Fact = SamePattern;
133     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
134     Destroy_SuperMatrix_Store(&lu->A);
135     if (lu->options.Equil) {
136       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137     }
138     if (lu->lwork >= 0) {
139       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
140       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
141       lu->options.Fact = SamePattern;
142     }
143   }
144 
145   /* Create the SuperMatrix for lu->A=A^T:
146        Since SuperLU likes column-oriented matrices,we pass it the transpose,
147        and then solve A^T X = B in MatSolve(). */
148   if (lu->options.Equil) {
149     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150   } else {
151     aa = (Mat_SeqAIJ*)(A)->data;
152   }
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_USE_REAL_SINGLE)
155   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
156 #else
157   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
158 #endif
159 #else
160 #if defined(PETSC_USE_REAL_SINGLE)
161   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
162 #else
163   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
164 #endif
165 #endif
166 
167   /* Numerical factorization */
168   lu->B.ncol = 0;  /* Indicate not to solve the system */
169   if (F->factortype == MAT_FACTOR_LU) {
170 #if defined(PETSC_USE_COMPLEX)
171 #if defined(PETSC_USE_REAL_SINGLE)
172     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174                                      &lu->mem_usage, &lu->stat, &sinfo));
175 #else
176     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178                                      &lu->mem_usage, &lu->stat, &sinfo));
179 #endif
180 #else
181 #if defined(PETSC_USE_REAL_SINGLE)
182     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
183                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184                                      &lu->mem_usage, &lu->stat, &sinfo));
185 #else
186     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
187                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188                                      &lu->mem_usage, &lu->stat, &sinfo));
189 #endif
190 #endif
191   } else if (F->factortype == MAT_FACTOR_ILU) {
192     /* Compute the incomplete factorization, condition number and pivot growth */
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_REAL_SINGLE)
195     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
196                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197                                      &lu->mem_usage, &lu->stat, &sinfo));
198 #else
199     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201                                      &lu->mem_usage, &lu->stat, &sinfo));
202 #endif
203 #else
204 #if defined(PETSC_USE_REAL_SINGLE)
205     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
206                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207                                      &lu->mem_usage, &lu->stat, &sinfo));
208 #else
209     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211                                      &lu->mem_usage, &lu->stat, &sinfo));
212 #endif
213 #endif
214   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   if (!sinfo || sinfo == lu->A.ncol+1) {
216     if (lu->options.PivotGrowth) {
217       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
218     }
219     if (lu->options.ConditionNumber) {
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221     }
222   } else if (sinfo > 0) {
223     if (lu->lwork == -1) {
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
225     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227 
228   if (lu->options.PrintStat) {
229     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
231     Lstore = (SCformat*) lu->L.Store;
232     Ustore = (NCformat*) lu->U.Store;
233     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
237                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
238   }
239 
240   lu->flg                = SAME_NONZERO_PATTERN;
241   F->ops->solve          = MatSolve_SuperLU;
242   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243   F->ops->matsolve       = MatMatSolve_SuperLU;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SuperLU"
249 PetscErrorCode MatDestroy_SuperLU(Mat A)
250 {
251   PetscErrorCode ierr;
252   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
253 
254   PetscFunctionBegin;
255   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
256     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
257     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
258     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
259     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
260     if (lu->lwork >= 0) {
261       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
262       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
263     }
264   }
265   if (lu) {
266     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
267     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
268     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
269     ierr = PetscFree(lu->R);CHKERRQ(ierr);
270     ierr = PetscFree(lu->C);CHKERRQ(ierr);
271     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
272     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
273   }
274   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
275 
276   /* clear composed functions */
277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
279 
280   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatView_SuperLU"
286 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
287 {
288   PetscErrorCode    ierr;
289   PetscBool         iascii;
290   PetscViewerFormat format;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
294   if (iascii) {
295     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
296     if (format == PETSC_VIEWER_ASCII_INFO) {
297       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
298     }
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatSolve_SuperLU_Private"
306 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
307 {
308   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
309   PetscScalar    *barray,*xarray;
310   PetscErrorCode ierr;
311   PetscInt       info,i,n;
312   PetscReal      ferr,berr;
313 
314   PetscFunctionBegin;
315   if (lu->lwork == -1) PetscFunctionReturn(0);
316 
317   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
318   lu->B.ncol = 1;   /* Set the number of right-hand side */
319   if (lu->options.Equil && !lu->rhs_dup) {
320     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
321     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
322   }
323   if (lu->options.Equil) {
324     /* Copy b into rsh_dup */
325     ierr   = VecGetArray(b,&barray);CHKERRQ(ierr);
326     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
327     ierr   = VecRestoreArray(b,&barray);CHKERRQ(ierr);
328     barray = lu->rhs_dup;
329   } else {
330     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
331   }
332   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
333 
334 #if defined(PETSC_USE_COMPLEX)
335 #if defined(PETSC_USE_REAL_SINGLE)
336   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
337   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
338 #else
339   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
340   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
341 #endif
342 #else
343   ((DNformat*)lu->B.Store)->nzval = barray;
344   ((DNformat*)lu->X.Store)->nzval = xarray;
345 #endif
346 
347   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
348   if (A->factortype == MAT_FACTOR_LU) {
349 #if defined(PETSC_USE_COMPLEX)
350 #if defined(PETSC_USE_REAL_SINGLE)
351     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
352                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
353                                      &lu->mem_usage, &lu->stat, &info));
354 #else
355     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
356                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
357                                      &lu->mem_usage, &lu->stat, &info));
358 #endif
359 #else
360 #if defined(PETSC_USE_REAL_SINGLE)
361     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
362                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
363                                      &lu->mem_usage, &lu->stat, &info));
364 #else
365     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
366                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
367                                      &lu->mem_usage, &lu->stat, &info));
368 #endif
369 #endif
370   } else if (A->factortype == MAT_FACTOR_ILU) {
371 #if defined(PETSC_USE_COMPLEX)
372 #if defined(PETSC_USE_REAL_SINGLE)
373     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
374                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
375                                      &lu->mem_usage, &lu->stat, &info));
376 #else
377     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
378                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
379                                      &lu->mem_usage, &lu->stat, &info));
380 #endif
381 #else
382 #if defined(PETSC_USE_REAL_SINGLE)
383     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
384                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
385                                      &lu->mem_usage, &lu->stat, &info));
386 #else
387     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
388                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
389                                      &lu->mem_usage, &lu->stat, &info));
390 #endif
391 #endif
392   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
393   if (!lu->options.Equil) {
394     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
395   }
396   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
397 
398   if (!info || info == lu->A.ncol+1) {
399     if (lu->options.IterRefine) {
400       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
401       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
402       for (i = 0; i < 1; ++i) {
403         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
404       }
405     }
406   } else if (info > 0) {
407     if (lu->lwork == -1) {
408       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
409     } else {
410       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
411     }
412   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
413 
414   if (lu->options.PrintStat) {
415     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
416     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSolve_SuperLU"
423 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
424 {
425   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
426   PetscErrorCode ierr;
427 
428   PetscFunctionBegin;
429   lu->options.Trans = TRANS;
430 
431   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "MatSolveTranspose_SuperLU"
437 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
438 {
439   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   lu->options.Trans = NOTRANS;
444 
445   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatMatSolve_SuperLU"
451 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
452 {
453   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
454   PetscBool      flg;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
459   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
460   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
461   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
462   lu->options.Trans = TRANS;
463   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
464   PetscFunctionReturn(0);
465 }
466 
467 /*
468    Note the r permutation is ignored
469 */
470 #undef __FUNCT__
471 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
472 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
473 {
474   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
475 
476   PetscFunctionBegin;
477   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
478   lu->CleanUpSuperLU      = PETSC_TRUE;
479   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
485 static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
486 {
487   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
488 
489   PetscFunctionBegin;
490   lu->options.ILU_DropTol = dtol;
491   PetscFunctionReturn(0);
492 }
493 
494 #undef __FUNCT__
495 #define __FUNCT__ "MatSuperluSetILUDropTol"
496 /*@
497   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
498    Logically Collective on Mat
499 
500    Input Parameters:
501 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
502 -  dtol - drop tolerance
503 
504   Options Database:
505 .   -mat_superlu_ilu_droptol <dtol>
506 
507    Level: beginner
508 
509    References: SuperLU Users' Guide
510 
511 .seealso: MatGetFactor()
512 @*/
513 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
514 {
515   PetscErrorCode ierr;
516 
517   PetscFunctionBegin;
518   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
519   PetscValidLogicalCollectiveInt(F,dtol,2);
520   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
526 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
527 {
528   PetscFunctionBegin;
529   *type = MATSOLVERSUPERLU;
530   PetscFunctionReturn(0);
531 }
532 
533 
534 /*MC
535   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
536   via the external package SuperLU.
537 
538   Use ./configure --download-superlu to have PETSc installed with SuperLU
539 
540   Options Database Keys:
541 + -mat_superlu_equil <FALSE>            - Equil (None)
542 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
543 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
544 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
545 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
546 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
547 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
548 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
549 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
550 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
551 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
552 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
553 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
554 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
555 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
556 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
557 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
558 
559    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
560 
561    Level: beginner
562 
563 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
564 M*/
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
568 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
569 {
570   Mat            B;
571   Mat_SuperLU    *lu;
572   PetscErrorCode ierr;
573   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
574   PetscBool      flg;
575   PetscReal      real_input;
576   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
577   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
578   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
579 
580   PetscFunctionBegin;
581   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
582   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
583   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
584   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
585 
586   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
587     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
588     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
589   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
590 
591   B->ops->destroy = MatDestroy_SuperLU;
592   B->ops->view    = MatView_SuperLU;
593   B->factortype   = ftype;
594   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
595   B->preallocated = PETSC_TRUE;
596 
597   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
598 
599   if (ftype == MAT_FACTOR_LU) {
600     set_default_options(&lu->options);
601     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
602       "Whether or not the system will be equilibrated depends on the
603        scaling of the matrix A, but if equilibration is used, A is
604        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
605        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
606      We set 'options.Equil = NO' as default because additional space is needed for it.
607     */
608     lu->options.Equil = NO;
609   } else if (ftype == MAT_FACTOR_ILU) {
610     /* Set the default input options of ilu: */
611     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
612   }
613   lu->options.PrintStat = NO;
614 
615   /* Initialize the statistics variables. */
616   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
617   lu->lwork = 0;   /* allocate space internally by system malloc */
618 
619   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
620   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
621   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
622   if (flg) lu->options.ColPerm = (colperm_t)indx;
623   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
624   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
625   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
626   if (flg) lu->options.SymmetricMode = YES;
627   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
628   if (flg) lu->options.DiagPivotThresh = (double) real_input;
629   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
630   if (flg) lu->options.PivotGrowth = YES;
631   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
632   if (flg) lu->options.ConditionNumber = YES;
633   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
634   if (flg) lu->options.RowPerm = (rowperm_t)indx;
635   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
636   if (flg) lu->options.ReplaceTinyPivot = YES;
637   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
638   if (flg) lu->options.PrintStat = YES;
639   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
640   if (lu->lwork > 0) {
641     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
642   } else if (lu->lwork != 0 && lu->lwork != -1) {
643     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
644     lu->lwork = 0;
645   }
646   /* ilu options */
647   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
648   if (flg) lu->options.ILU_DropTol = (double) real_input;
649   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
650   if (flg) lu->options.ILU_FillTol = (double) real_input;
651   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
652   if (flg) lu->options.ILU_FillFactor = (double) real_input;
653   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
654   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
655   if (flg) lu->options.ILU_Norm = (norm_t)indx;
656   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
657   if (flg) lu->options.ILU_MILU = (milu_t)indx;
658   PetscOptionsEnd();
659   if (lu->options.Equil == YES) {
660     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
661     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
662   }
663 
664   /* Allocate spaces (notice sizes are for the transpose) */
665   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
666   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
667   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
668   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
669   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
670 
671   /* create rhs and solution x without allocate space for .Store */
672 #if defined(PETSC_USE_COMPLEX)
673 #if defined(PETSC_USE_REAL_SINGLE)
674   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
675   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
676 #else
677   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
678   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
679 #endif
680 #else
681 #if defined(PETSC_USE_REAL_SINGLE)
682   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
683   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
684 #else
685   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
686   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
687 #endif
688 #endif
689 
690   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
691   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
692   B->spptr = lu;
693   *F       = B;
694   PetscFunctionReturn(0);
695 }
696 
697