xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision fee518eb64eefbf9bda05b76e476d2dcf413f412)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90 
91   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103   if (A->factortype == MAT_FACTOR_ILU) {
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 /*
115     These are the methods provided to REPLACE the corresponding methods of the
116    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
120 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
121 {
122   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123   Mat_SeqAIJ     *aa;
124   PetscErrorCode ierr;
125   PetscInt       sinfo;
126   PetscReal      ferr, berr;
127   NCformat       *Ustore;
128   SCformat       *Lstore;
129 
130   PetscFunctionBegin;
131   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
132     lu->options.Fact = SamePattern;
133     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
134     Destroy_SuperMatrix_Store(&lu->A);
135     if (lu->options.Equil) {
136       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137     }
138     if (lu->lwork >= 0) {
139       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
140       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
141       lu->options.Fact = SamePattern;
142     }
143   }
144 
145   /* Create the SuperMatrix for lu->A=A^T:
146        Since SuperLU likes column-oriented matrices,we pass it the transpose,
147        and then solve A^T X = B in MatSolve(). */
148   if (lu->options.Equil) {
149     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150   } else {
151     aa = (Mat_SeqAIJ*)(A)->data;
152   }
153 #if defined(PETSC_USE_COMPLEX)
154 #if defined(PETSC_USE_REAL_SINGLE)
155   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
156 #else
157   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
158 #endif
159 #else
160 #if defined(PETSC_USE_REAL_SINGLE)
161   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
162 #else
163   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
164 #endif
165 #endif
166 
167   /* Numerical factorization */
168   lu->B.ncol = 0;  /* Indicate not to solve the system */
169   if (F->factortype == MAT_FACTOR_LU) {
170 #if defined(PETSC_USE_COMPLEX)
171 #if defined(PETSC_USE_REAL_SINGLE)
172     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174                                      &lu->mem_usage, &lu->stat, &sinfo));
175 #else
176     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178                                      &lu->mem_usage, &lu->stat, &sinfo));
179 #endif
180 #else
181 #if defined(PETSC_USE_REAL_SINGLE)
182     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
183                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184                                      &lu->mem_usage, &lu->stat, &sinfo));
185 #else
186     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
187                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188                                      &lu->mem_usage, &lu->stat, &sinfo));
189 #endif
190 #endif
191   } else if (F->factortype == MAT_FACTOR_ILU) {
192     /* Compute the incomplete factorization, condition number and pivot growth */
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_REAL_SINGLE)
195     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
196                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197                                      &lu->mem_usage, &lu->stat, &sinfo));
198 #else
199     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201                                      &lu->mem_usage, &lu->stat, &sinfo));
202 #endif
203 #else
204 #if defined(PETSC_USE_REAL_SINGLE)
205     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
206                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207                                      &lu->mem_usage, &lu->stat, &sinfo));
208 #else
209     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211                                      &lu->mem_usage, &lu->stat, &sinfo));
212 #endif
213 #endif
214   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
215   if (!sinfo || sinfo == lu->A.ncol+1) {
216     if (lu->options.PivotGrowth) {
217       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
218     }
219     if (lu->options.ConditionNumber) {
220       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
221     }
222   } else if (sinfo > 0) {
223     if (lu->lwork == -1) {
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
225     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
227 
228   if (lu->options.PrintStat) {
229     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
231     Lstore = (SCformat*) lu->L.Store;
232     Ustore = (NCformat*) lu->U.Store;
233     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
237                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
238   }
239 
240   lu->flg                = SAME_NONZERO_PATTERN;
241   F->ops->solve          = MatSolve_SuperLU;
242   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243   F->ops->matsolve       = MatMatSolve_SuperLU;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SuperLU"
249 PetscErrorCode MatDestroy_SuperLU(Mat A)
250 {
251   PetscErrorCode ierr;
252   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
253 
254   PetscFunctionBegin;
255   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
256     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
257     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
258     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
259     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
260     if (lu->lwork >= 0) {
261       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
262       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
263     }
264   }
265   if (lu) {
266     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
267     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
268     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
269     ierr = PetscFree(lu->R);CHKERRQ(ierr);
270     ierr = PetscFree(lu->C);CHKERRQ(ierr);
271     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
272     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
273   }
274   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
275 
276   /* clear composed functions */
277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",NULL);CHKERRQ(ierr);
278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",NULL);CHKERRQ(ierr);
279 
280   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatView_SuperLU"
286 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
287 {
288   PetscErrorCode    ierr;
289   PetscBool         iascii;
290   PetscViewerFormat format;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
294   if (iascii) {
295     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
296     if (format == PETSC_VIEWER_ASCII_INFO) {
297       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
298     }
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatSolve_SuperLU_Private"
306 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
307 {
308   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
309   PetscScalar    *barray,*xarray;
310   PetscErrorCode ierr;
311   PetscInt       info,i,n;
312   PetscReal      ferr,berr;
313 
314   PetscFunctionBegin;
315   if (lu->lwork == -1) PetscFunctionReturn(0);
316 
317   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
318   lu->B.ncol = 1;   /* Set the number of right-hand side */
319   if (lu->options.Equil && !lu->rhs_dup) {
320     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
321     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
322   }
323   if (lu->options.Equil) {
324     /* Copy b into rsh_dup */
325     ierr   = VecGetArray(b,&barray);CHKERRQ(ierr);
326     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
327     ierr   = VecRestoreArray(b,&barray);CHKERRQ(ierr);
328     barray = lu->rhs_dup;
329   } else {
330     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
331   }
332   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
333 
334 #if defined(PETSC_USE_COMPLEX)
335 #if defined(PETSC_USE_REAL_SINGLE)
336   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
337   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
338 #else
339   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
340   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
341 #endif
342 #else
343   ((DNformat*)lu->B.Store)->nzval = barray;
344   ((DNformat*)lu->X.Store)->nzval = xarray;
345 #endif
346 
347   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
348   if (A->factortype == MAT_FACTOR_LU) {
349 #if defined(PETSC_USE_COMPLEX)
350 #if defined(PETSC_USE_REAL_SINGLE)
351     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
352                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
353                                      &lu->mem_usage, &lu->stat, &info));
354 #else
355     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
356                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
357                                      &lu->mem_usage, &lu->stat, &info));
358 #endif
359 #else
360 #if defined(PETSC_USE_REAL_SINGLE)
361     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
362                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
363                                      &lu->mem_usage, &lu->stat, &info));
364 #else
365     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
366                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
367                                      &lu->mem_usage, &lu->stat, &info));
368 #endif
369 #endif
370   } else if (A->factortype == MAT_FACTOR_ILU) {
371 #if defined(PETSC_USE_COMPLEX)
372 #if defined(PETSC_USE_REAL_SINGLE)
373     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
374                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
375                                      &lu->mem_usage, &lu->stat, &info));
376 #else
377     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
378                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
379                                      &lu->mem_usage, &lu->stat, &info));
380 #endif
381 #else
382 #if defined(PETSC_USE_REAL_SINGLE)
383     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
384                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
385                                      &lu->mem_usage, &lu->stat, &info));
386 #else
387     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
388                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
389                                      &lu->mem_usage, &lu->stat, &info));
390 #endif
391 #endif
392   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
393   if (!lu->options.Equil) {
394     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
395   }
396   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
397 
398   if (!info || info == lu->A.ncol+1) {
399     if (lu->options.IterRefine) {
400       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
401       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
402       for (i = 0; i < 1; ++i) {
403         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
404       }
405     }
406   } else if (info > 0) {
407     if (lu->lwork == -1) {
408       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
409     } else {
410       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
411     }
412   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
413 
414   if (lu->options.PrintStat) {
415     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
416     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSolve_SuperLU"
423 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
424 {
425   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
426   PetscErrorCode ierr;
427 
428   PetscFunctionBegin;
429   lu->options.Trans = TRANS;
430 
431   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "MatSolveTranspose_SuperLU"
437 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
438 {
439   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   lu->options.Trans = NOTRANS;
444 
445   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatMatSolve_SuperLU"
451 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
452 {
453   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
454   PetscBool      flg;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
459   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
460   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
461   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
462   lu->options.Trans = TRANS;
463   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
464   PetscFunctionReturn(0);
465 }
466 
467 /*
468    Note the r permutation is ignored
469 */
470 #undef __FUNCT__
471 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
472 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
473 {
474   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
475 
476   PetscFunctionBegin;
477   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
478   lu->CleanUpSuperLU      = PETSC_TRUE;
479   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
480   PetscFunctionReturn(0);
481 }
482 
483 EXTERN_C_BEGIN
484 #undef __FUNCT__
485 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
486 PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
487 {
488   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
489 
490   PetscFunctionBegin;
491   lu->options.ILU_DropTol = dtol;
492   PetscFunctionReturn(0);
493 }
494 EXTERN_C_END
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatSuperluSetILUDropTol"
498 /*@
499   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
500    Logically Collective on Mat
501 
502    Input Parameters:
503 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
504 -  dtol - drop tolerance
505 
506   Options Database:
507 .   -mat_superlu_ilu_droptol <dtol>
508 
509    Level: beginner
510 
511    References: SuperLU Users' Guide
512 
513 .seealso: MatGetFactor()
514 @*/
515 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
516 {
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
521   PetscValidLogicalCollectiveInt(F,dtol,2);
522   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 EXTERN_C_BEGIN
527 #undef __FUNCT__
528 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
529 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
530 {
531   PetscFunctionBegin;
532   *type = MATSOLVERSUPERLU;
533   PetscFunctionReturn(0);
534 }
535 EXTERN_C_END
536 
537 
538 /*MC
539   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
540   via the external package SuperLU.
541 
542   Use ./configure --download-superlu to have PETSc installed with SuperLU
543 
544   Options Database Keys:
545 + -mat_superlu_equil <FALSE>            - Equil (None)
546 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
547 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
548 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
549 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
550 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
551 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
552 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
553 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
554 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
555 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
556 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
557 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
558 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
559 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
560 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
561 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
562 
563    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
564 
565    Level: beginner
566 
567 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
568 M*/
569 
570 EXTERN_C_BEGIN
571 #undef __FUNCT__
572 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
573 PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
574 {
575   Mat            B;
576   Mat_SuperLU    *lu;
577   PetscErrorCode ierr;
578   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
579   PetscBool      flg;
580   PetscReal      real_input;
581   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
582   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
583   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
584 
585   PetscFunctionBegin;
586   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
587   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
588   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
589   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
590 
591   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
592     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
593     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
594   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
595 
596   B->ops->destroy = MatDestroy_SuperLU;
597   B->ops->view    = MatView_SuperLU;
598   B->factortype   = ftype;
599   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
600   B->preallocated = PETSC_TRUE;
601 
602   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
603 
604   if (ftype == MAT_FACTOR_LU) {
605     set_default_options(&lu->options);
606     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
607       "Whether or not the system will be equilibrated depends on the
608        scaling of the matrix A, but if equilibration is used, A is
609        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
610        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
611      We set 'options.Equil = NO' as default because additional space is needed for it.
612     */
613     lu->options.Equil = NO;
614   } else if (ftype == MAT_FACTOR_ILU) {
615     /* Set the default input options of ilu: */
616     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
617   }
618   lu->options.PrintStat = NO;
619 
620   /* Initialize the statistics variables. */
621   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
622   lu->lwork = 0;   /* allocate space internally by system malloc */
623 
624   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
625   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
626   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
627   if (flg) lu->options.ColPerm = (colperm_t)indx;
628   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
629   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
630   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
631   if (flg) lu->options.SymmetricMode = YES;
632   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
633   if (flg) lu->options.DiagPivotThresh = (double) real_input;
634   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
635   if (flg) lu->options.PivotGrowth = YES;
636   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
637   if (flg) lu->options.ConditionNumber = YES;
638   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
639   if (flg) lu->options.RowPerm = (rowperm_t)indx;
640   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
641   if (flg) lu->options.ReplaceTinyPivot = YES;
642   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
643   if (flg) lu->options.PrintStat = YES;
644   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
645   if (lu->lwork > 0) {
646     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
647   } else if (lu->lwork != 0 && lu->lwork != -1) {
648     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
649     lu->lwork = 0;
650   }
651   /* ilu options */
652   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
653   if (flg) lu->options.ILU_DropTol = (double) real_input;
654   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
655   if (flg) lu->options.ILU_FillTol = (double) real_input;
656   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
657   if (flg) lu->options.ILU_FillFactor = (double) real_input;
658   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
659   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
660   if (flg) lu->options.ILU_Norm = (norm_t)indx;
661   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
662   if (flg) lu->options.ILU_MILU = (milu_t)indx;
663   PetscOptionsEnd();
664   if (lu->options.Equil == YES) {
665     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
666     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
667   }
668 
669   /* Allocate spaces (notice sizes are for the transpose) */
670   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
671   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
672   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
673   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
674   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
675 
676   /* create rhs and solution x without allocate space for .Store */
677 #if defined(PETSC_USE_COMPLEX)
678 #if defined(PETSC_USE_REAL_SINGLE)
679   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
680   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
681 #else
682   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
683   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
684 #endif
685 #else
686 #if defined(PETSC_USE_REAL_SINGLE)
687   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
688   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
689 #else
690   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
691   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
692 #endif
693 #endif
694 
695 #if defined(SUPERLU2)
696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void (*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
697 #endif
698   ierr     = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
699   ierr     = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
700   B->spptr = lu;
701   *F       = B;
702   PetscFunctionReturn(0);
703 }
704 EXTERN_C_END
705 
706