xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90 
91   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103   if (A->factortype == MAT_FACTOR_ILU) {
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115     These are the methods provided to REPLACE the corresponding methods of the
116    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
120 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
121 {
122   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123   Mat_SeqAIJ     *aa;
124   PetscErrorCode ierr;
125   PetscInt       sinfo;
126   PetscReal      ferr, berr;
127   NCformat       *Ustore;
128   SCformat       *Lstore;
129 
130   PetscFunctionBegin;
131   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
132     lu->options.Fact = SamePattern;
133     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
134     Destroy_SuperMatrix_Store(&lu->A);
135     if (lu->options.Equil) {
136       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137     }
138     if (lu->lwork >= 0) {
139       Destroy_SuperNode_Matrix(&lu->L);
140       Destroy_CompCol_Matrix(&lu->U);
141       lu->options.Fact = SamePattern;
142     }
143   }
144 
145   /* Create the SuperMatrix for lu->A=A^T:
146        Since SuperLU likes column-oriented matrices,we pass it the transpose,
147        and then solve A^T X = B in MatSolve(). */
148   if (lu->options.Equil) {
149     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150   } else {
151     aa = (Mat_SeqAIJ*)(A)->data;
152   }
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_USE_REAL_SINGLE)
155   cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE);
156 #else
157   zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE);
158 #endif
159 #else
160 #if defined(PETSC_USE_REAL_SINGLE)
161   sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE);
162 #else
163   dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE);
164 #endif
165 #endif
166 
167   /* Numerical factorization */
168   lu->B.ncol = 0;  /* Indicate not to solve the system */
169   if (F->factortype == MAT_FACTOR_LU) {
170 #if defined(PETSC_USE_COMPLEX)
171 #if defined(PETSC_USE_REAL_SINGLE)
172     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174            &lu->mem_usage, &lu->stat, &sinfo);
175 #else
176     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178            &lu->mem_usage, &lu->stat, &sinfo);
179 #endif
180 #else
181 #if defined(PETSC_USE_REAL_SINGLE)
182     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
183            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184            &lu->mem_usage, &lu->stat, &sinfo);
185 #else
186     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
187            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188            &lu->mem_usage, &lu->stat, &sinfo);
189 #endif
190 #endif
191   } else if (F->factortype == MAT_FACTOR_ILU) {
192     /* Compute the incomplete factorization, condition number and pivot growth */
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_REAL_SINGLE)
195     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
196            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197            &lu->mem_usage, &lu->stat, &sinfo);
198 #else
199     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201            &lu->mem_usage, &lu->stat, &sinfo);
202 #endif
203 #else
204 #if defined(PETSC_USE_REAL_SINGLE)
205     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
206            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207            &lu->mem_usage, &lu->stat, &sinfo);
208 #else
209     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211            &lu->mem_usage, &lu->stat, &sinfo);
212 #endif
213 #endif
214   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   if (!sinfo || sinfo == lu->A.ncol+1) {
216     if (lu->options.PivotGrowth) {
217       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
218     }
219     if (lu->options.ConditionNumber) {
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221     }
222   } else if (sinfo > 0) {
223     if (lu->lwork == -1) {
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
225     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227 
228   if (lu->options.PrintStat) {
229     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230     StatPrint(&lu->stat);
231     Lstore = (SCformat*) lu->L.Store;
232     Ustore = (NCformat*) lu->U.Store;
233     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
237                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
238   }
239 
240   lu->flg                = SAME_NONZERO_PATTERN;
241   F->ops->solve          = MatSolve_SuperLU;
242   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243   F->ops->matsolve       = MatMatSolve_SuperLU;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SuperLU"
249 PetscErrorCode MatDestroy_SuperLU(Mat A)
250 {
251   PetscErrorCode ierr;
252   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
253 
254   PetscFunctionBegin;
255   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
256     Destroy_SuperMatrix_Store(&lu->A);
257     Destroy_SuperMatrix_Store(&lu->B);
258     Destroy_SuperMatrix_Store(&lu->X);
259     StatFree(&lu->stat);
260     if (lu->lwork >= 0) {
261       Destroy_SuperNode_Matrix(&lu->L);
262       Destroy_CompCol_Matrix(&lu->U);
263     }
264   }
265   if (lu) {
266     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
267     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
268     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
269     ierr = PetscFree(lu->R);CHKERRQ(ierr);
270     ierr = PetscFree(lu->C);CHKERRQ(ierr);
271     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
272     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
273   }
274   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
275 
276   /* clear composed functions */
277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);CHKERRQ(ierr);
278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",PETSC_NULL);CHKERRQ(ierr);
279 
280   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatView_SuperLU"
286 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
287 {
288   PetscErrorCode    ierr;
289   PetscBool         iascii;
290   PetscViewerFormat format;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
294   if (iascii) {
295     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
296     if (format == PETSC_VIEWER_ASCII_INFO) {
297       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
298     }
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatSolve_SuperLU_Private"
306 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
307 {
308   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
309   PetscScalar    *barray,*xarray;
310   PetscErrorCode ierr;
311   PetscInt       info,i,n=x->map->n;
312   PetscReal      ferr,berr;
313 
314   PetscFunctionBegin;
315   if (lu->lwork == -1) PetscFunctionReturn(0);
316 
317   lu->B.ncol = 1;   /* Set the number of right-hand side */
318   if (lu->options.Equil && !lu->rhs_dup) {
319     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
320     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
321   }
322   if (lu->options.Equil) {
323     /* Copy b into rsh_dup */
324     ierr   = VecGetArray(b,&barray);CHKERRQ(ierr);
325     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
326     ierr   = VecRestoreArray(b,&barray);CHKERRQ(ierr);
327     barray = lu->rhs_dup;
328   } else {
329     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
330   }
331   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
332 
333 #if defined(PETSC_USE_COMPLEX)
334 #if defined(PETSC_USE_REAL_SINGLE)
335   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
336   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
337 #else
338   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
339   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
340 #endif
341 #else
342   ((DNformat*)lu->B.Store)->nzval = barray;
343   ((DNformat*)lu->X.Store)->nzval = xarray;
344 #endif
345 
346   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
347   if (A->factortype == MAT_FACTOR_LU) {
348 #if defined(PETSC_USE_COMPLEX)
349 #if defined(PETSC_USE_REAL_SINGLE)
350     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
351            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
352            &lu->mem_usage, &lu->stat, &info);
353 #else
354     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
355            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
356            &lu->mem_usage, &lu->stat, &info);
357 #endif
358 #else
359 #if defined(PETSC_USE_REAL_SINGLE)
360     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
361            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
362            &lu->mem_usage, &lu->stat, &info);
363 #else
364     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
365            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
366            &lu->mem_usage, &lu->stat, &info);
367 #endif
368 #endif
369   } else if (A->factortype == MAT_FACTOR_ILU) {
370 #if defined(PETSC_USE_COMPLEX)
371 #if defined(PETSC_USE_REAL_SINGLE)
372     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
373            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
374            &lu->mem_usage, &lu->stat, &info);
375 #else
376     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
377            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
378            &lu->mem_usage, &lu->stat, &info);
379 #endif
380 #else
381 #if defined(PETSC_USE_REAL_SINGLE)
382     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
383            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
384            &lu->mem_usage, &lu->stat, &info);
385 #else
386     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
387            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
388            &lu->mem_usage, &lu->stat, &info);
389 #endif
390 #endif
391   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
392   if (!lu->options.Equil) {
393     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
394   }
395   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
396 
397   if (!info || info == lu->A.ncol+1) {
398     if (lu->options.IterRefine) {
399       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
400       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
401       for (i = 0; i < 1; ++i) {
402         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
403       }
404     }
405   } else if (info > 0) {
406     if (lu->lwork == -1) {
407       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
408     } else {
409       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
410     }
411   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
412 
413   if (lu->options.PrintStat) {
414     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
415     StatPrint(&lu->stat);
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatSolve_SuperLU"
422 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
423 {
424   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
425   PetscErrorCode ierr;
426 
427   PetscFunctionBegin;
428   lu->options.Trans = TRANS;
429 
430   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatSolveTranspose_SuperLU"
436 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
437 {
438   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   lu->options.Trans = NOTRANS;
443 
444   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatMatSolve_SuperLU"
450 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
451 {
452   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
453   PetscBool      flg;
454   PetscErrorCode ierr;
455 
456   PetscFunctionBegin;
457   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
458   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
459   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
460   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
461   lu->options.Trans = TRANS;
462   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
463   PetscFunctionReturn(0);
464 }
465 
466 /*
467    Note the r permutation is ignored
468 */
469 #undef __FUNCT__
470 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
471 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
472 {
473   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
474 
475   PetscFunctionBegin;
476   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
477   lu->CleanUpSuperLU      = PETSC_TRUE;
478   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
479   PetscFunctionReturn(0);
480 }
481 
482 EXTERN_C_BEGIN
483 #undef __FUNCT__
484 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
485 PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
486 {
487   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
488 
489   PetscFunctionBegin;
490   lu->options.ILU_DropTol = dtol;
491   PetscFunctionReturn(0);
492 }
493 EXTERN_C_END
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSuperluSetILUDropTol"
497 /*@
498   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
499    Logically Collective on Mat
500 
501    Input Parameters:
502 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
503 -  dtol - drop tolerance
504 
505   Options Database:
506 .   -mat_superlu_ilu_droptol <dtol>
507 
508    Level: beginner
509 
510    References: SuperLU Users' Guide
511 
512 .seealso: MatGetFactor()
513 @*/
514 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
515 {
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
520   PetscValidLogicalCollectiveInt(F,dtol,2);
521   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 EXTERN_C_BEGIN
526 #undef __FUNCT__
527 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
528 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
529 {
530   PetscFunctionBegin;
531   *type = MATSOLVERSUPERLU;
532   PetscFunctionReturn(0);
533 }
534 EXTERN_C_END
535 
536 
537 /*MC
538   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
539   via the external package SuperLU.
540 
541   Use ./configure --download-superlu to have PETSc installed with SuperLU
542 
543   Options Database Keys:
544 + -mat_superlu_equil <FALSE>            - Equil (None)
545 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
546 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
547 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
548 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
549 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
550 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
551 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
552 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
553 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
554 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
555 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
556 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
557 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
558 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
559 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
560 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
561 
562    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
563 
564    Level: beginner
565 
566 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
567 M*/
568 
569 EXTERN_C_BEGIN
570 #undef __FUNCT__
571 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
572 PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
573 {
574   Mat            B;
575   Mat_SuperLU    *lu;
576   PetscErrorCode ierr;
577   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
578   PetscBool      flg;
579   PetscReal      real_input;
580   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
581   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
582   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
583 
584   PetscFunctionBegin;
585   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
586   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
587   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
588   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
589 
590   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
591     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
592     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
593   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
594 
595   B->ops->destroy = MatDestroy_SuperLU;
596   B->ops->view    = MatView_SuperLU;
597   B->factortype   = ftype;
598   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
599   B->preallocated = PETSC_TRUE;
600 
601   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
602 
603   if (ftype == MAT_FACTOR_LU) {
604     set_default_options(&lu->options);
605     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
606       "Whether or not the system will be equilibrated depends on the
607        scaling of the matrix A, but if equilibration is used, A is
608        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
609        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
610      We set 'options.Equil = NO' as default because additional space is needed for it.
611     */
612     lu->options.Equil = NO;
613   } else if (ftype == MAT_FACTOR_ILU) {
614     /* Set the default input options of ilu: */
615     ilu_set_default_options(&lu->options);
616   }
617   lu->options.PrintStat = NO;
618 
619   /* Initialize the statistics variables. */
620   StatInit(&lu->stat);
621   lu->lwork = 0;   /* allocate space internally by system malloc */
622 
623   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
624   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
625   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
626   if (flg) lu->options.ColPerm = (colperm_t)indx;
627   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
628   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
629   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
630   if (flg) lu->options.SymmetricMode = YES;
631   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
632   if (flg) lu->options.DiagPivotThresh = (double) real_input;
633   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
634   if (flg) lu->options.PivotGrowth = YES;
635   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
636   if (flg) lu->options.ConditionNumber = YES;
637   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
638   if (flg) lu->options.RowPerm = (rowperm_t)indx;
639   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
640   if (flg) lu->options.ReplaceTinyPivot = YES;
641   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
642   if (flg) lu->options.PrintStat = YES;
643   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr);
644   if (lu->lwork > 0) {
645     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
646   } else if (lu->lwork != 0 && lu->lwork != -1) {
647     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
648     lu->lwork = 0;
649   }
650   /* ilu options */
651   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
652   if (flg) lu->options.ILU_DropTol = (double) real_input;
653   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
654   if (flg) lu->options.ILU_FillTol = (double) real_input;
655   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
656   if (flg) lu->options.ILU_FillFactor = (double) real_input;
657   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,PETSC_NULL);CHKERRQ(ierr);
658   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
659   if (flg) lu->options.ILU_Norm = (norm_t)indx;
660   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
661   if (flg) lu->options.ILU_MILU = (milu_t)indx;
662   PetscOptionsEnd();
663   if (lu->options.Equil == YES) {
664     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
665     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
666   }
667 
668   /* Allocate spaces (notice sizes are for the transpose) */
669   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
670   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
671   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
672   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
673   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
674 
675   /* create rhs and solution x without allocate space for .Store */
676 #if defined(PETSC_USE_COMPLEX)
677 #if defined(PETSC_USE_REAL_SINGLE)
678   cCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
679   cCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
680 #else
681   zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
682   zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
683 #endif
684 #else
685 #if defined(PETSC_USE_REAL_SINGLE)
686   sCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
687   sCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
688 #else
689   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
690   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
691 #endif
692 #endif
693 
694 #if defined(SUPERLU2)
695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void (*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
696 #endif
697   ierr     = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
698   ierr     = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
699   B->spptr = lu;
700   *F       = B;
701   PetscFunctionReturn(0);
702 }
703 EXTERN_C_END
704 
705