xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision a6dfd86ebdf75fa024070af26d51b62fd16650a3)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool  CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo *);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat *);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
91   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
102   if (A->factortype == MAT_FACTOR_ILU) {
103     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 /*
114     These are the methods provided to REPLACE the corresponding methods of the
115    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
116 */
117 #undef __FUNCT__
118 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
119 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
120 {
121   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
122   Mat_SeqAIJ     *aa;
123   PetscErrorCode ierr;
124   PetscInt       sinfo;
125   PetscReal      ferr, berr;
126   NCformat       *Ustore;
127   SCformat       *Lstore;
128 
129   PetscFunctionBegin;
130   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
131     lu->options.Fact = SamePattern;
132     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
133     Destroy_SuperMatrix_Store(&lu->A);
134     if (lu->options.Equil) {
135       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
136     }
137     if ( lu->lwork >= 0 ) {
138       Destroy_SuperNode_Matrix(&lu->L);
139       Destroy_CompCol_Matrix(&lu->U);
140       lu->options.Fact = SamePattern;
141     }
142   }
143 
144   /* Create the SuperMatrix for lu->A=A^T:
145        Since SuperLU likes column-oriented matrices,we pass it the transpose,
146        and then solve A^T X = B in MatSolve(). */
147   if (lu->options.Equil) {
148     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
149   } else {
150     aa = (Mat_SeqAIJ*)(A)->data;
151   }
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_USE_REAL_SINGLE)
154   cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE);
155 #else
156   zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE);
157 #endif
158 #else
159 #if defined(PETSC_USE_REAL_SINGLE)
160   sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE);
161 #else
162   dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE);
163 #endif
164 #endif
165 
166   /* Numerical factorization */
167   lu->B.ncol = 0;  /* Indicate not to solve the system */
168   if (F->factortype == MAT_FACTOR_LU) {
169 #if defined(PETSC_USE_COMPLEX)
170 #if defined(PETSC_USE_REAL_SINGLE)
171     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
172            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
173            &lu->mem_usage, &lu->stat, &sinfo);
174 #else
175     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
176            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
177            &lu->mem_usage, &lu->stat, &sinfo);
178 #endif
179 #else
180 #if defined(PETSC_USE_REAL_SINGLE)
181     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
182            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
183            &lu->mem_usage, &lu->stat, &sinfo);
184 #else
185     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
186            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
187            &lu->mem_usage, &lu->stat, &sinfo);
188 #endif
189 #endif
190   } else if (F->factortype == MAT_FACTOR_ILU) {
191     /* Compute the incomplete factorization, condition number and pivot growth */
192 #if defined(PETSC_USE_COMPLEX)
193 #if defined(PETSC_USE_REAL_SINGLE)
194     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
195            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
196            &lu->mem_usage, &lu->stat, &sinfo);
197 #else
198     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
199            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
200            &lu->mem_usage, &lu->stat, &sinfo);
201 #endif
202 #else
203 #if defined(PETSC_USE_REAL_SINGLE)
204     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
205           &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
206           &lu->mem_usage, &lu->stat, &sinfo);
207 #else
208     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
209           &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
210           &lu->mem_usage, &lu->stat, &sinfo);
211 #endif
212 #endif
213   } else {
214     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   }
216   if ( !sinfo || sinfo == lu->A.ncol+1 ) {
217     if ( lu->options.PivotGrowth )
218       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
219     if ( lu->options.ConditionNumber )
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221   } else if ( sinfo > 0 ) {
222     if ( lu->lwork == -1 ) {
223       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
224     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
225   } else { /* sinfo < 0 */
226     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227   }
228 
229   if ( lu->options.PrintStat ) {
230     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
231     StatPrint(&lu->stat);
232     Lstore = (SCformat *) lu->L.Store;
233     Ustore = (NCformat *) lu->U.Store;
234     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
235     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
236     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
237     ierr = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
238     lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
239   }
240 
241   lu->flg = SAME_NONZERO_PATTERN;
242   F->ops->solve          = MatSolve_SuperLU;
243   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
244   F->ops->matsolve       = MatMatSolve_SuperLU;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatDestroy_SuperLU"
250 PetscErrorCode MatDestroy_SuperLU(Mat A)
251 {
252   PetscErrorCode ierr;
253   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
254 
255   PetscFunctionBegin;
256   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
257     Destroy_SuperMatrix_Store(&lu->A);
258     Destroy_SuperMatrix_Store(&lu->B);
259     Destroy_SuperMatrix_Store(&lu->X);
260     StatFree(&lu->stat);
261     if (lu->lwork >= 0) {
262       Destroy_SuperNode_Matrix(&lu->L);
263       Destroy_CompCol_Matrix(&lu->U);
264     }
265   }
266   if (lu) {
267     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
268     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
269     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
270     ierr = PetscFree(lu->R);CHKERRQ(ierr);
271     ierr = PetscFree(lu->C);CHKERRQ(ierr);
272     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
273     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
274   }
275   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
276 
277   /* clear composed functions */
278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);CHKERRQ(ierr);
279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",PETSC_NULL);CHKERRQ(ierr);
280 
281   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatView_SuperLU"
287 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
288 {
289   PetscErrorCode    ierr;
290   PetscBool         iascii;
291   PetscViewerFormat format;
292 
293   PetscFunctionBegin;
294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
295   if (iascii) {
296     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
297     if (format == PETSC_VIEWER_ASCII_INFO) {
298       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
299     }
300   }
301   PetscFunctionReturn(0);
302 }
303 
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSolve_SuperLU_Private"
307 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
308 {
309   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
310   PetscScalar    *barray,*xarray;
311   PetscErrorCode ierr;
312   PetscInt       info,i,n=x->map->n;
313   PetscReal      ferr,berr;
314 
315   PetscFunctionBegin;
316   if ( lu->lwork == -1 ) {
317     PetscFunctionReturn(0);
318   }
319 
320   lu->B.ncol = 1;   /* Set the number of right-hand side */
321   if (lu->options.Equil && !lu->rhs_dup) {
322     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
323     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
324   }
325   if (lu->options.Equil) {
326     /* Copy b into rsh_dup */
327     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
328     ierr = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
329     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
330     barray = lu->rhs_dup;
331   } else {
332     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
333   }
334   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
335 
336 #if defined(PETSC_USE_COMPLEX)
337 #if defined(PETSC_USE_REAL_SINGLE)
338   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
339   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
340 #else
341   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
342   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
343 #endif
344 #else
345   ((DNformat*)lu->B.Store)->nzval = barray;
346   ((DNformat*)lu->X.Store)->nzval = xarray;
347 #endif
348 
349   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
350   if (A->factortype == MAT_FACTOR_LU) {
351 #if defined(PETSC_USE_COMPLEX)
352 #if defined(PETSC_USE_REAL_SINGLE)
353     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
354            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
355            &lu->mem_usage, &lu->stat, &info);
356 #else
357     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
358            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
359            &lu->mem_usage, &lu->stat, &info);
360 #endif
361 #else
362 #if defined(PETSC_USE_REAL_SINGLE)
363     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
364            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
365            &lu->mem_usage, &lu->stat, &info);
366 #else
367     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
368            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
369            &lu->mem_usage, &lu->stat, &info);
370 #endif
371 #endif
372   } else if (A->factortype == MAT_FACTOR_ILU) {
373 #if defined(PETSC_USE_COMPLEX)
374 #if defined(PETSC_USE_REAL_SINGLE)
375     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
376            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
377            &lu->mem_usage, &lu->stat, &info);
378 #else
379     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
380            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
381            &lu->mem_usage, &lu->stat, &info);
382 #endif
383 #else
384 #if defined(PETSC_USE_REAL_SINGLE)
385     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
386            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
387            &lu->mem_usage, &lu->stat, &info);
388 #else
389     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
390            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
391            &lu->mem_usage, &lu->stat, &info);
392 #endif
393 #endif
394   } else {
395     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
396   }
397   if (!lu->options.Equil) {
398     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
399   }
400   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
401 
402   if ( !info || info == lu->A.ncol+1 ) {
403     if ( lu->options.IterRefine ) {
404       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
405       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
406       for (i = 0; i < 1; ++i)
407         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
408     }
409   } else if ( info > 0 ) {
410     if ( lu->lwork == -1 ) {
411       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
412     } else {
413       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
414     }
415   } else if (info < 0) {
416     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
417   }
418 
419   if ( lu->options.PrintStat ) {
420     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
421     StatPrint(&lu->stat);
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatSolve_SuperLU"
428 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
429 {
430   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
431   PetscErrorCode ierr;
432 
433   PetscFunctionBegin;
434   lu->options.Trans = TRANS;
435   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "MatSolveTranspose_SuperLU"
441 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
442 {
443   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
444   PetscErrorCode ierr;
445 
446   PetscFunctionBegin;
447   lu->options.Trans = NOTRANS;
448   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatMatSolve_SuperLU"
454 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
455 {
456   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
457   PetscBool      flg;
458   PetscErrorCode ierr;
459 
460   PetscFunctionBegin;
461   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
462   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
463   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
464   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");  lu->options.Trans = TRANS;
465   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
466   PetscFunctionReturn(0);
467 }
468 
469 /*
470    Note the r permutation is ignored
471 */
472 #undef __FUNCT__
473 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
474 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
475 {
476   Mat_SuperLU    *lu = (Mat_SuperLU*)(F->spptr);
477 
478   PetscFunctionBegin;
479   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
480   lu->CleanUpSuperLU      = PETSC_TRUE;
481   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
482   PetscFunctionReturn(0);
483 }
484 
485 EXTERN_C_BEGIN
486 #undef __FUNCT__
487 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
488 PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
489 {
490   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
491 
492   PetscFunctionBegin;
493   lu->options.ILU_DropTol = dtol;
494   PetscFunctionReturn(0);
495 }
496 EXTERN_C_END
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSuperluSetILUDropTol"
500 /*@
501   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
502    Logically Collective on Mat
503 
504    Input Parameters:
505 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
506 -  dtol - drop tolerance
507 
508   Options Database:
509 .   -mat_superlu_ilu_droptol <dtol>
510 
511    Level: beginner
512 
513    References: SuperLU Users' Guide
514 
515 .seealso: MatGetFactor()
516 @*/
517 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
518 {
519   PetscErrorCode ierr;
520 
521   PetscFunctionBegin;
522   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
523   PetscValidLogicalCollectiveInt(F,dtol,2);
524   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 EXTERN_C_BEGIN
529 #undef __FUNCT__
530 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
531 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
532 {
533   PetscFunctionBegin;
534   *type = MATSOLVERSUPERLU;
535   PetscFunctionReturn(0);
536 }
537 EXTERN_C_END
538 
539 
540 /*MC
541   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
542   via the external package SuperLU.
543 
544   Use ./configure --download-superlu to have PETSc installed with SuperLU
545 
546   Options Database Keys:
547 + -mat_superlu_equil <FALSE>            - Equil (None)
548 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
549 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
550 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
551 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
552 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
553 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
554 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
555 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
556 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
557 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
558 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
559 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
560 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
561 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
562 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
563 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
564 
565    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
566 
567    Level: beginner
568 
569 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
570 M*/
571 
572 EXTERN_C_BEGIN
573 #undef __FUNCT__
574 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
575 PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
576 {
577   Mat            B;
578   Mat_SuperLU    *lu;
579   PetscErrorCode ierr;
580   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
581   PetscBool      flg;
582   PetscReal      real_input;
583   const char     *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
584   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
585   const char     *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
586 
587   PetscFunctionBegin;
588   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
589   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
590   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
591   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
592 
593   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
594     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
595     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
596   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
597 
598   B->ops->destroy          = MatDestroy_SuperLU;
599   B->ops->view             = MatView_SuperLU;
600   B->factortype            = ftype;
601   B->assembled             = PETSC_TRUE;  /* required by -ksp_view */
602   B->preallocated          = PETSC_TRUE;
603 
604   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
605 
606   if (ftype == MAT_FACTOR_LU) {
607     set_default_options(&lu->options);
608     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
609       "Whether or not the system will be equilibrated depends on the
610        scaling of the matrix A, but if equilibration is used, A is
611        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
612        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
613      We set 'options.Equil = NO' as default because additional space is needed for it.
614     */
615     lu->options.Equil = NO;
616   } else if (ftype == MAT_FACTOR_ILU) {
617     /* Set the default input options of ilu: */
618     ilu_set_default_options(&lu->options);
619   }
620   lu->options.PrintStat = NO;
621 
622   /* Initialize the statistics variables. */
623   StatInit(&lu->stat);
624   lu->lwork = 0;   /* allocate space internally by system malloc */
625 
626   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
627     ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
628     ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
629     if (flg) {lu->options.ColPerm = (colperm_t)indx;}
630     ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
631     if (flg) { lu->options.IterRefine = (IterRefine_t)indx;}
632     ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
633     if (flg) lu->options.SymmetricMode = YES;
634     ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
635     if (flg) lu->options.DiagPivotThresh = (double) real_input;
636     ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
637     if (flg) lu->options.PivotGrowth = YES;
638     ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
639     if (flg) lu->options.ConditionNumber = YES;
640     ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
641     if (flg) {lu->options.RowPerm = (rowperm_t)indx;}
642     ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
643     if (flg) lu->options.ReplaceTinyPivot = YES;
644     ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
645     if (flg) lu->options.PrintStat = YES;
646     ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr);
647     if (lu->lwork > 0 ) {
648       ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
649     } else if (lu->lwork != 0 && lu->lwork != -1) {
650       ierr = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
651       lu->lwork = 0;
652     }
653     /* ilu options */
654     ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
655     if (flg) lu->options.ILU_DropTol = (double) real_input;
656     ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
657     if (flg) lu->options.ILU_FillTol = (double) real_input;
658     ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
659     if (flg) lu->options.ILU_FillFactor = (double) real_input;
660     ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,PETSC_NULL);CHKERRQ(ierr);
661     ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
662     if (flg) {
663       lu->options.ILU_Norm = (norm_t)indx;
664     }
665     ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
666     if (flg) {
667       lu->options.ILU_MILU = (milu_t)indx;
668     }
669   PetscOptionsEnd();
670   if (lu->options.Equil == YES) {
671     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
672     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
673   }
674 
675   /* Allocate spaces (notice sizes are for the transpose) */
676   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
677   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
678   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
679   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
680   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
681 
682   /* create rhs and solution x without allocate space for .Store */
683 #if defined(PETSC_USE_COMPLEX)
684 #if defined(PETSC_USE_REAL_SINGLE)
685   cCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
686   cCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
687 #else
688   zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
689   zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
690 #endif
691 #else
692 #if defined(PETSC_USE_REAL_SINGLE)
693   sCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
694   sCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
695 #else
696   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
697   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
698 #endif
699 #endif
700 
701 #ifdef SUPERLU2
702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
703 #endif
704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
706   B->spptr = lu;
707   *F = B;
708   PetscFunctionReturn(0);
709 }
710 EXTERN_C_END
711 
712