xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool  CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo *);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat *);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
91   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
102   if (A->factortype == MAT_FACTOR_ILU) {
103     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 /*
114     These are the methods provided to REPLACE the corresponding methods of the
115    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
116 */
117 #undef __FUNCT__
118 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
119 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
120 {
121   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
122   Mat_SeqAIJ     *aa;
123   PetscErrorCode ierr;
124   PetscInt       sinfo;
125   PetscReal      ferr, berr;
126   NCformat       *Ustore;
127   SCformat       *Lstore;
128 
129   PetscFunctionBegin;
130   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
131     lu->options.Fact = SamePattern;
132     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
133     Destroy_SuperMatrix_Store(&lu->A);
134     if (lu->options.Equil) {
135       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
136     }
137     if (lu->lwork >= 0) {
138       Destroy_SuperNode_Matrix(&lu->L);
139       Destroy_CompCol_Matrix(&lu->U);
140       lu->options.Fact = SamePattern;
141     }
142   }
143 
144   /* Create the SuperMatrix for lu->A=A^T:
145        Since SuperLU likes column-oriented matrices,we pass it the transpose,
146        and then solve A^T X = B in MatSolve(). */
147   if (lu->options.Equil) {
148     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
149   } else {
150     aa = (Mat_SeqAIJ*)(A)->data;
151   }
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_USE_REAL_SINGLE)
154   cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE);
155 #else
156   zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE);
157 #endif
158 #else
159 #if defined(PETSC_USE_REAL_SINGLE)
160   sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE);
161 #else
162   dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE);
163 #endif
164 #endif
165 
166   /* Numerical factorization */
167   lu->B.ncol = 0;  /* Indicate not to solve the system */
168   if (F->factortype == MAT_FACTOR_LU) {
169 #if defined(PETSC_USE_COMPLEX)
170 #if defined(PETSC_USE_REAL_SINGLE)
171     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
172            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
173            &lu->mem_usage, &lu->stat, &sinfo);
174 #else
175     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
176            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
177            &lu->mem_usage, &lu->stat, &sinfo);
178 #endif
179 #else
180 #if defined(PETSC_USE_REAL_SINGLE)
181     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
182            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
183            &lu->mem_usage, &lu->stat, &sinfo);
184 #else
185     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
186            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
187            &lu->mem_usage, &lu->stat, &sinfo);
188 #endif
189 #endif
190   } else if (F->factortype == MAT_FACTOR_ILU) {
191     /* Compute the incomplete factorization, condition number and pivot growth */
192 #if defined(PETSC_USE_COMPLEX)
193 #if defined(PETSC_USE_REAL_SINGLE)
194     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
195            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
196            &lu->mem_usage, &lu->stat, &sinfo);
197 #else
198     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
199            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
200            &lu->mem_usage, &lu->stat, &sinfo);
201 #endif
202 #else
203 #if defined(PETSC_USE_REAL_SINGLE)
204     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
205           &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
206           &lu->mem_usage, &lu->stat, &sinfo);
207 #else
208     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
209           &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
210           &lu->mem_usage, &lu->stat, &sinfo);
211 #endif
212 #endif
213   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
214   if (!sinfo || sinfo == lu->A.ncol+1) {
215     if (lu->options.PivotGrowth)
216       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
217     if (lu->options.ConditionNumber)
218       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
219   } else if (sinfo > 0) {
220     if (lu->lwork == -1) {
221       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
222     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
223   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
224 
225   if (lu->options.PrintStat) {
226     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
227     StatPrint(&lu->stat);
228     Lstore = (SCformat *) lu->L.Store;
229     Ustore = (NCformat *) lu->U.Store;
230     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
231     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
232     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
233     ierr = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
234     lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
235   }
236 
237   lu->flg = SAME_NONZERO_PATTERN;
238   F->ops->solve          = MatSolve_SuperLU;
239   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
240   F->ops->matsolve       = MatMatSolve_SuperLU;
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatDestroy_SuperLU"
246 PetscErrorCode MatDestroy_SuperLU(Mat A)
247 {
248   PetscErrorCode ierr;
249   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
250 
251   PetscFunctionBegin;
252   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
253     Destroy_SuperMatrix_Store(&lu->A);
254     Destroy_SuperMatrix_Store(&lu->B);
255     Destroy_SuperMatrix_Store(&lu->X);
256     StatFree(&lu->stat);
257     if (lu->lwork >= 0) {
258       Destroy_SuperNode_Matrix(&lu->L);
259       Destroy_CompCol_Matrix(&lu->U);
260     }
261   }
262   if (lu) {
263     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
264     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
265     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
266     ierr = PetscFree(lu->R);CHKERRQ(ierr);
267     ierr = PetscFree(lu->C);CHKERRQ(ierr);
268     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
269     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
270   }
271   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
272 
273   /* clear composed functions */
274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);CHKERRQ(ierr);
275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",PETSC_NULL);CHKERRQ(ierr);
276 
277   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatView_SuperLU"
283 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
284 {
285   PetscErrorCode    ierr;
286   PetscBool         iascii;
287   PetscViewerFormat format;
288 
289   PetscFunctionBegin;
290   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
291   if (iascii) {
292     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
293     if (format == PETSC_VIEWER_ASCII_INFO) {
294       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
295     }
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSolve_SuperLU_Private"
303 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
304 {
305   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
306   PetscScalar    *barray,*xarray;
307   PetscErrorCode ierr;
308   PetscInt       info,i,n=x->map->n;
309   PetscReal      ferr,berr;
310 
311   PetscFunctionBegin;
312   if (lu->lwork == -1) {
313     PetscFunctionReturn(0);
314   }
315 
316   lu->B.ncol = 1;   /* Set the number of right-hand side */
317   if (lu->options.Equil && !lu->rhs_dup) {
318     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
319     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
320   }
321   if (lu->options.Equil) {
322     /* Copy b into rsh_dup */
323     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
324     ierr = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
325     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
326     barray = lu->rhs_dup;
327   } else {
328     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
329   }
330   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
331 
332 #if defined(PETSC_USE_COMPLEX)
333 #if defined(PETSC_USE_REAL_SINGLE)
334   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
335   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
336 #else
337   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
338   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
339 #endif
340 #else
341   ((DNformat*)lu->B.Store)->nzval = barray;
342   ((DNformat*)lu->X.Store)->nzval = xarray;
343 #endif
344 
345   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
346   if (A->factortype == MAT_FACTOR_LU) {
347 #if defined(PETSC_USE_COMPLEX)
348 #if defined(PETSC_USE_REAL_SINGLE)
349     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
350            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
351            &lu->mem_usage, &lu->stat, &info);
352 #else
353     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
354            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
355            &lu->mem_usage, &lu->stat, &info);
356 #endif
357 #else
358 #if defined(PETSC_USE_REAL_SINGLE)
359     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
360            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
361            &lu->mem_usage, &lu->stat, &info);
362 #else
363     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
364            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
365            &lu->mem_usage, &lu->stat, &info);
366 #endif
367 #endif
368   } else if (A->factortype == MAT_FACTOR_ILU) {
369 #if defined(PETSC_USE_COMPLEX)
370 #if defined(PETSC_USE_REAL_SINGLE)
371     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
372            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
373            &lu->mem_usage, &lu->stat, &info);
374 #else
375     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
376            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
377            &lu->mem_usage, &lu->stat, &info);
378 #endif
379 #else
380 #if defined(PETSC_USE_REAL_SINGLE)
381     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
382            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
383            &lu->mem_usage, &lu->stat, &info);
384 #else
385     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
386            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
387            &lu->mem_usage, &lu->stat, &info);
388 #endif
389 #endif
390   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
391   if (!lu->options.Equil) {
392     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
393   }
394   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
395 
396   if (!info || info == lu->A.ncol+1) {
397     if (lu->options.IterRefine) {
398       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
399       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
400       for (i = 0; i < 1; ++i)
401         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
402     }
403   } else if (info > 0) {
404     if (lu->lwork == -1) {
405       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
406     } else {
407       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
408     }
409   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
410 
411   if (lu->options.PrintStat) {
412     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
413     StatPrint(&lu->stat);
414   }
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatSolve_SuperLU"
420 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
421 {
422   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
423   PetscErrorCode ierr;
424 
425   PetscFunctionBegin;
426   lu->options.Trans = TRANS;
427   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSolveTranspose_SuperLU"
433 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
434 {
435   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   lu->options.Trans = NOTRANS;
440   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMatSolve_SuperLU"
446 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
447 {
448   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
449   PetscBool      flg;
450   PetscErrorCode ierr;
451 
452   PetscFunctionBegin;
453   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
454   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
455   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
456   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");  lu->options.Trans = TRANS;
457   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
458   PetscFunctionReturn(0);
459 }
460 
461 /*
462    Note the r permutation is ignored
463 */
464 #undef __FUNCT__
465 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
466 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
467 {
468   Mat_SuperLU    *lu = (Mat_SuperLU*)(F->spptr);
469 
470   PetscFunctionBegin;
471   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
472   lu->CleanUpSuperLU      = PETSC_TRUE;
473   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
474   PetscFunctionReturn(0);
475 }
476 
477 EXTERN_C_BEGIN
478 #undef __FUNCT__
479 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
480 PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
481 {
482   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
483 
484   PetscFunctionBegin;
485   lu->options.ILU_DropTol = dtol;
486   PetscFunctionReturn(0);
487 }
488 EXTERN_C_END
489 
490 #undef __FUNCT__
491 #define __FUNCT__ "MatSuperluSetILUDropTol"
492 /*@
493   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
494    Logically Collective on Mat
495 
496    Input Parameters:
497 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
498 -  dtol - drop tolerance
499 
500   Options Database:
501 .   -mat_superlu_ilu_droptol <dtol>
502 
503    Level: beginner
504 
505    References: SuperLU Users' Guide
506 
507 .seealso: MatGetFactor()
508 @*/
509 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
510 {
511   PetscErrorCode ierr;
512 
513   PetscFunctionBegin;
514   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
515   PetscValidLogicalCollectiveInt(F,dtol,2);
516   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
517   PetscFunctionReturn(0);
518 }
519 
520 EXTERN_C_BEGIN
521 #undef __FUNCT__
522 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
523 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
524 {
525   PetscFunctionBegin;
526   *type = MATSOLVERSUPERLU;
527   PetscFunctionReturn(0);
528 }
529 EXTERN_C_END
530 
531 
532 /*MC
533   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
534   via the external package SuperLU.
535 
536   Use ./configure --download-superlu to have PETSc installed with SuperLU
537 
538   Options Database Keys:
539 + -mat_superlu_equil <FALSE>            - Equil (None)
540 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
541 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
542 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
543 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
544 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
545 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
546 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
547 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
548 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
549 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
550 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
551 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
552 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
553 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
554 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
555 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
556 
557    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
558 
559    Level: beginner
560 
561 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
562 M*/
563 
564 EXTERN_C_BEGIN
565 #undef __FUNCT__
566 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
567 PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
568 {
569   Mat            B;
570   Mat_SuperLU    *lu;
571   PetscErrorCode ierr;
572   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
573   PetscBool      flg;
574   PetscReal      real_input;
575   const char     *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
576   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
577   const char     *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
578 
579   PetscFunctionBegin;
580   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
581   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
582   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
583   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
584 
585   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
586     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
587     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
588   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
589 
590   B->ops->destroy          = MatDestroy_SuperLU;
591   B->ops->view             = MatView_SuperLU;
592   B->factortype            = ftype;
593   B->assembled             = PETSC_TRUE;  /* required by -ksp_view */
594   B->preallocated          = PETSC_TRUE;
595 
596   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
597 
598   if (ftype == MAT_FACTOR_LU) {
599     set_default_options(&lu->options);
600     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
601       "Whether or not the system will be equilibrated depends on the
602        scaling of the matrix A, but if equilibration is used, A is
603        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
604        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
605      We set 'options.Equil = NO' as default because additional space is needed for it.
606     */
607     lu->options.Equil = NO;
608   } else if (ftype == MAT_FACTOR_ILU) {
609     /* Set the default input options of ilu: */
610     ilu_set_default_options(&lu->options);
611   }
612   lu->options.PrintStat = NO;
613 
614   /* Initialize the statistics variables. */
615   StatInit(&lu->stat);
616   lu->lwork = 0;   /* allocate space internally by system malloc */
617 
618   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
619     ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
620     ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
621     if (flg) {lu->options.ColPerm = (colperm_t)indx;}
622     ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
623     if (flg) { lu->options.IterRefine = (IterRefine_t)indx;}
624     ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
625     if (flg) lu->options.SymmetricMode = YES;
626     ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
627     if (flg) lu->options.DiagPivotThresh = (double) real_input;
628     ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
629     if (flg) lu->options.PivotGrowth = YES;
630     ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
631     if (flg) lu->options.ConditionNumber = YES;
632     ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
633     if (flg) {lu->options.RowPerm = (rowperm_t)indx;}
634     ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
635     if (flg) lu->options.ReplaceTinyPivot = YES;
636     ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
637     if (flg) lu->options.PrintStat = YES;
638     ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr);
639     if (lu->lwork > 0) {
640       ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
641     } else if (lu->lwork != 0 && lu->lwork != -1) {
642       ierr = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
643       lu->lwork = 0;
644     }
645     /* ilu options */
646     ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
647     if (flg) lu->options.ILU_DropTol = (double) real_input;
648     ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
649     if (flg) lu->options.ILU_FillTol = (double) real_input;
650     ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
651     if (flg) lu->options.ILU_FillFactor = (double) real_input;
652     ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,PETSC_NULL);CHKERRQ(ierr);
653     ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
654     if (flg) {
655       lu->options.ILU_Norm = (norm_t)indx;
656     }
657     ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
658     if (flg) {
659       lu->options.ILU_MILU = (milu_t)indx;
660     }
661   PetscOptionsEnd();
662   if (lu->options.Equil == YES) {
663     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
664     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
665   }
666 
667   /* Allocate spaces (notice sizes are for the transpose) */
668   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
669   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
670   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
671   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
672   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
673 
674   /* create rhs and solution x without allocate space for .Store */
675 #if defined(PETSC_USE_COMPLEX)
676 #if defined(PETSC_USE_REAL_SINGLE)
677   cCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
678   cCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
679 #else
680   zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
681   zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
682 #endif
683 #else
684 #if defined(PETSC_USE_REAL_SINGLE)
685   sCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
686   sCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
687 #else
688   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
689   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
690 #endif
691 #endif
692 
693 #if defined(SUPERLU2)
694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
695 #endif
696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
698   B->spptr = lu;
699   *F = B;
700   PetscFunctionReturn(0);
701 }
702 EXTERN_C_END
703 
704