xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision bcaeba4d41d6ca6c6dc4189db20683073a9959ce)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 
19 /*
20      SuperLU include files
21 */
22 EXTERN_C_BEGIN
23 #if defined(PETSC_USE_COMPLEX)
24 #if defined(PETSC_USE_REAL_SINGLE)
25 #include <slu_cdefs.h>
26 #else
27 #include <slu_zdefs.h>
28 #endif
29 #else
30 #if defined(PETSC_USE_REAL_SINGLE)
31 #include <slu_sdefs.h>
32 #else
33 #include <slu_ddefs.h>
34 #endif
35 #endif
36 #include <slu_util.h>
37 EXTERN_C_END
38 
39 /*
40      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
41 */
42 typedef struct {
43   SuperMatrix       A,L,U,B,X;
44   superlu_options_t options;
45   PetscInt          *perm_c; /* column permutation vector */
46   PetscInt          *perm_r; /* row permutations from partial pivoting */
47   PetscInt          *etree;
48   PetscReal         *R, *C;
49   char              equed[1];
50   PetscInt          lwork;
51   void              *work;
52   PetscReal         rpg, rcond;
53   mem_usage_t       mem_usage;
54   MatStructure      flg;
55   SuperLUStat_t     stat;
56   Mat               A_dup;
57   PetscScalar       *rhs_dup;
58 
59   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60   PetscBool  CleanUpSuperLU;
61 } Mat_SuperLU;
62 
63 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
64 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo *);
65 extern PetscErrorCode MatDestroy_SuperLU(Mat);
66 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
71 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat *);
73 
74 /*
75     Utility function
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatFactorInfo_SuperLU"
79 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
80 {
81   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
82   PetscErrorCode    ierr;
83   superlu_options_t options;
84 
85   PetscFunctionBegin;
86   /* check if matrix is superlu_dist type */
87   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
88 
89   options = lu->options;
90   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
91   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr);
92   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
102   if (A->factortype == MAT_FACTOR_ILU){
103     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
104     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 /*
114     These are the methods provided to REPLACE the corresponding methods of the
115    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
116 */
117 #undef __FUNCT__
118 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
119 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
120 {
121   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
122   Mat_SeqAIJ     *aa;
123   PetscErrorCode ierr;
124   PetscInt       sinfo;
125   PetscReal      ferr, berr;
126   NCformat       *Ustore;
127   SCformat       *Lstore;
128 
129   PetscFunctionBegin;
130   if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */
131     lu->options.Fact = SamePattern;
132     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
133     Destroy_SuperMatrix_Store(&lu->A);
134     if (lu->options.Equil){
135       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
136     }
137     if ( lu->lwork >= 0 ) {
138       Destroy_SuperNode_Matrix(&lu->L);
139       Destroy_CompCol_Matrix(&lu->U);
140       lu->options.Fact = SamePattern;
141     }
142   }
143 
144   /* Create the SuperMatrix for lu->A=A^T:
145        Since SuperLU likes column-oriented matrices,we pass it the transpose,
146        and then solve A^T X = B in MatSolve(). */
147   if (lu->options.Equil){
148     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
149   } else {
150     aa = (Mat_SeqAIJ*)(A)->data;
151   }
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_USE_REAL_SINGLE)
154   cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,
155                            SLU_NC,SLU_C,SLU_GE);
156 #else
157   zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,
158                            SLU_NC,SLU_Z,SLU_GE);
159 #endif
160 #else
161 #if defined(PETSC_USE_REAL_SINGLE)
162   sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,
163                            SLU_NC,SLU_S,SLU_GE);
164 #else
165   dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,
166                            SLU_NC,SLU_D,SLU_GE);
167 #endif
168 #endif
169 
170   /* Numerical factorization */
171   lu->B.ncol = 0;  /* Indicate not to solve the system */
172   if (F->factortype == MAT_FACTOR_LU){
173 #if defined(PETSC_USE_COMPLEX)
174 #if defined(PETSC_USE_REAL_SINGLE)
175     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
176            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
177            &lu->mem_usage, &lu->stat, &sinfo);
178 #else
179     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
180            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
181            &lu->mem_usage, &lu->stat, &sinfo);
182 #endif
183 #else
184 #if defined(PETSC_USE_REAL_SINGLE)
185     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
186            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
187            &lu->mem_usage, &lu->stat, &sinfo);
188 #else
189     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
190            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
191            &lu->mem_usage, &lu->stat, &sinfo);
192 #endif
193 #endif
194   } else if (F->factortype == MAT_FACTOR_ILU){
195     /* Compute the incomplete factorization, condition number and pivot growth */
196 #if defined(PETSC_USE_COMPLEX)
197 #if defined(PETSC_USE_REAL_SINGLE)
198     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
199            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
200            &lu->mem_usage, &lu->stat, &sinfo);
201 #else
202     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
203            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
204            &lu->mem_usage, &lu->stat, &sinfo);
205 #endif
206 #else
207 #if defined(PETSC_USE_REAL_SINGLE)
208     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
209           &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
210           &lu->mem_usage, &lu->stat, &sinfo);
211 #else
212     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
213           &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
214           &lu->mem_usage, &lu->stat, &sinfo);
215 #endif
216 #endif
217   } else {
218     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
219   }
220   if ( !sinfo || sinfo == lu->A.ncol+1 ) {
221     if ( lu->options.PivotGrowth )
222       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
223     if ( lu->options.ConditionNumber )
224       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
225   } else if ( sinfo > 0 ){
226     if ( lu->lwork == -1 ) {
227       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
228     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
229   } else { /* sinfo < 0 */
230     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
231   }
232 
233   if ( lu->options.PrintStat ) {
234     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
235     StatPrint(&lu->stat);
236     Lstore = (SCformat *) lu->L.Store;
237     Ustore = (NCformat *) lu->U.Store;
238     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
239     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
240     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
241     ierr = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
242 	       lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
243   }
244 
245   lu->flg = SAME_NONZERO_PATTERN;
246   F->ops->solve          = MatSolve_SuperLU;
247   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
248   F->ops->matsolve       = MatMatSolve_SuperLU;
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatDestroy_SuperLU"
254 PetscErrorCode MatDestroy_SuperLU(Mat A)
255 {
256   PetscErrorCode ierr;
257   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
258 
259   PetscFunctionBegin;
260   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
261     Destroy_SuperMatrix_Store(&lu->A);
262     Destroy_SuperMatrix_Store(&lu->B);
263     Destroy_SuperMatrix_Store(&lu->X);
264     StatFree(&lu->stat);
265     if (lu->lwork >= 0) {
266       Destroy_SuperNode_Matrix(&lu->L);
267       Destroy_CompCol_Matrix(&lu->U);
268     }
269   }
270   if (lu) {
271     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
272     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
273     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
274     ierr = PetscFree(lu->R);CHKERRQ(ierr);
275     ierr = PetscFree(lu->C);CHKERRQ(ierr);
276     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
277     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
278   }
279   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
280 
281   /* clear composed functions */
282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);CHKERRQ(ierr);
283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",PETSC_NULL);CHKERRQ(ierr);
284 
285   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 #undef __FUNCT__
290 #define __FUNCT__ "MatView_SuperLU"
291 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
292 {
293   PetscErrorCode    ierr;
294   PetscBool         iascii;
295   PetscViewerFormat format;
296 
297   PetscFunctionBegin;
298   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
299   if (iascii) {
300     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
301     if (format == PETSC_VIEWER_ASCII_INFO) {
302       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
303     }
304   }
305   PetscFunctionReturn(0);
306 }
307 
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatSolve_SuperLU_Private"
311 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
312 {
313   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
314   PetscScalar    *barray,*xarray;
315   PetscErrorCode ierr;
316   PetscInt       info,i,n=x->map->n;
317   PetscReal      ferr,berr;
318 
319   PetscFunctionBegin;
320   if ( lu->lwork == -1 ) {
321     PetscFunctionReturn(0);
322   }
323 
324   lu->B.ncol = 1;   /* Set the number of right-hand side */
325   if (lu->options.Equil && !lu->rhs_dup){
326     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
327     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
328   }
329   if (lu->options.Equil){
330     /* Copy b into rsh_dup */
331     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
332     ierr = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
333     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
334     barray = lu->rhs_dup;
335   } else {
336     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
337   }
338   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
339 
340 #if defined(PETSC_USE_COMPLEX)
341 #if defined(PETSC_USE_REAL_SINGLE)
342   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
343   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
344 #else
345   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
346   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
347 #endif
348 #else
349   ((DNformat*)lu->B.Store)->nzval = barray;
350   ((DNformat*)lu->X.Store)->nzval = xarray;
351 #endif
352 
353   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
354   if (A->factortype == MAT_FACTOR_LU){
355 #if defined(PETSC_USE_COMPLEX)
356 #if defined(PETSC_USE_REAL_SINGLE)
357     cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
358            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
359            &lu->mem_usage, &lu->stat, &info);
360 #else
361     zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
362            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
363            &lu->mem_usage, &lu->stat, &info);
364 #endif
365 #else
366 #if defined(PETSC_USE_REAL_SINGLE)
367     sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
368            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
369            &lu->mem_usage, &lu->stat, &info);
370 #else
371     dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
372            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
373            &lu->mem_usage, &lu->stat, &info);
374 #endif
375 #endif
376   } else if (A->factortype == MAT_FACTOR_ILU){
377 #if defined(PETSC_USE_COMPLEX)
378 #if defined(PETSC_USE_REAL_SINGLE)
379     cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
380            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
381            &lu->mem_usage, &lu->stat, &info);
382 #else
383     zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
384            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
385            &lu->mem_usage, &lu->stat, &info);
386 #endif
387 #else
388 #if defined(PETSC_USE_REAL_SINGLE)
389     sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
390            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
391            &lu->mem_usage, &lu->stat, &info);
392 #else
393     dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
394            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
395            &lu->mem_usage, &lu->stat, &info);
396 #endif
397 #endif
398   } else {
399     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
400   }
401   if (!lu->options.Equil){
402     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
403   }
404   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
405 
406   if ( !info || info == lu->A.ncol+1 ) {
407     if ( lu->options.IterRefine ) {
408       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
409       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
410       for (i = 0; i < 1; ++i)
411         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
412     }
413   } else if ( info > 0 ){
414     if ( lu->lwork == -1 ) {
415       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
416     } else {
417       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
418     }
419   } else if (info < 0){
420     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
421   }
422 
423   if ( lu->options.PrintStat ) {
424     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
425     StatPrint(&lu->stat);
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatSolve_SuperLU"
432 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
433 {
434   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   lu->options.Trans = TRANS;
439   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatSolveTranspose_SuperLU"
445 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
446 {
447   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   lu->options.Trans = NOTRANS;
452   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatMatSolve_SuperLU"
458 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
459 {
460   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
461   PetscBool      flg;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
466   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
467   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,PETSC_NULL);CHKERRQ(ierr);
468   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");  lu->options.Trans = TRANS;
469   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
470   PetscFunctionReturn(0);
471 }
472 
473 /*
474    Note the r permutation is ignored
475 */
476 #undef __FUNCT__
477 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
478 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
479 {
480   Mat_SuperLU    *lu = (Mat_SuperLU*)(F->spptr);
481 
482   PetscFunctionBegin;
483   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
484   lu->CleanUpSuperLU      = PETSC_TRUE;
485   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
486   PetscFunctionReturn(0);
487 }
488 
489 EXTERN_C_BEGIN
490 #undef __FUNCT__
491 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
492 PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
493 {
494   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
495 
496   PetscFunctionBegin;
497   lu->options.ILU_DropTol = dtol;
498   PetscFunctionReturn(0);
499 }
500 EXTERN_C_END
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatSuperluSetILUDropTol"
504 /*@
505   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
506    Logically Collective on Mat
507 
508    Input Parameters:
509 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
510 -  dtol - drop tolerance
511 
512   Options Database:
513 .   -mat_superlu_ilu_droptol <dtol>
514 
515    Level: beginner
516 
517    References: SuperLU Users' Guide
518 
519 .seealso: MatGetFactor()
520 @*/
521 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
522 {
523   PetscErrorCode ierr;
524 
525   PetscFunctionBegin;
526   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
527   PetscValidLogicalCollectiveInt(F,dtol,2);
528   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 EXTERN_C_BEGIN
533 #undef __FUNCT__
534 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
535 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
536 {
537   PetscFunctionBegin;
538   *type = MATSOLVERSUPERLU;
539   PetscFunctionReturn(0);
540 }
541 EXTERN_C_END
542 
543 
544 /*MC
545   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
546   via the external package SuperLU.
547 
548   Use ./configure --download-superlu to have PETSc installed with SuperLU
549 
550   Options Database Keys:
551 + -mat_superlu_equil <FALSE>            - Equil (None)
552 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
553 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
554 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
555 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
556 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
557 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
558 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
559 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
560 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
561 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
562 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
563 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
564 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
565 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
566 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
567 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
568 
569    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
570 
571    Level: beginner
572 
573 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
574 M*/
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
579 PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
580 {
581   Mat            B;
582   Mat_SuperLU    *lu;
583   PetscErrorCode ierr;
584   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
585   PetscBool      flg;
586   PetscReal      real_input;
587   const char     *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
588   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
589   const char     *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
590 
591   PetscFunctionBegin;
592   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
593   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
594   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
595   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
596 
597   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU){
598     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
599     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
600   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
601 
602   B->ops->destroy          = MatDestroy_SuperLU;
603   B->ops->view             = MatView_SuperLU;
604   B->factortype            = ftype;
605   B->assembled             = PETSC_TRUE;  /* required by -ksp_view */
606   B->preallocated          = PETSC_TRUE;
607 
608   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
609 
610   if (ftype == MAT_FACTOR_LU){
611     set_default_options(&lu->options);
612     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
613       "Whether or not the system will be equilibrated depends on the
614        scaling of the matrix A, but if equilibration is used, A is
615        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
616        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
617      We set 'options.Equil = NO' as default because additional space is needed for it.
618     */
619     lu->options.Equil = NO;
620   } else if (ftype == MAT_FACTOR_ILU){
621     /* Set the default input options of ilu: */
622     ilu_set_default_options(&lu->options);
623   }
624   lu->options.PrintStat = NO;
625 
626   /* Initialize the statistics variables. */
627   StatInit(&lu->stat);
628   lu->lwork = 0;   /* allocate space internally by system malloc */
629 
630   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
631     ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
632     ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
633     if (flg) {lu->options.ColPerm = (colperm_t)indx;}
634     ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
635     if (flg) { lu->options.IterRefine = (IterRefine_t)indx;}
636     ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
637     if (flg) lu->options.SymmetricMode = YES;
638     ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
639     if (flg) lu->options.DiagPivotThresh = (double) real_input;
640     ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
641     if (flg) lu->options.PivotGrowth = YES;
642     ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
643     if (flg) lu->options.ConditionNumber = YES;
644     ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
645     if (flg) {lu->options.RowPerm = (rowperm_t)indx;}
646     ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
647     if (flg) lu->options.ReplaceTinyPivot = YES;
648     ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
649     if (flg) lu->options.PrintStat = YES;
650     ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr);
651     if (lu->lwork > 0 ){
652       ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
653     } else if (lu->lwork != 0 && lu->lwork != -1){
654       ierr = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
655       lu->lwork = 0;
656     }
657     /* ilu options */
658     ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
659     if (flg) lu->options.ILU_DropTol = (double) real_input;
660     ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
661     if (flg) lu->options.ILU_FillTol = (double) real_input;
662     ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
663     if (flg) lu->options.ILU_FillFactor = (double) real_input;
664     ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,PETSC_NULL);CHKERRQ(ierr);
665     ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
666     if (flg){
667       lu->options.ILU_Norm = (norm_t)indx;
668     }
669     ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
670     if (flg){
671       lu->options.ILU_MILU = (milu_t)indx;
672     }
673   PetscOptionsEnd();
674   if (lu->options.Equil == YES) {
675     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
676     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
677   }
678 
679   /* Allocate spaces (notice sizes are for the transpose) */
680   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
681   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
682   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
683   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
684   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
685 
686   /* create rhs and solution x without allocate space for .Store */
687 #if defined(PETSC_USE_COMPLEX)
688 #if defined(PETSC_USE_REAL_SINGLE)
689   cCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
690   cCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_C, SLU_GE);
691 #else
692   zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
693   zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
694 #endif
695 #else
696 #if defined(PETSC_USE_REAL_SINGLE)
697   sCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
698   sCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_S, SLU_GE);
699 #else
700   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
701   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
702 #endif
703 #endif
704 
705 #ifdef SUPERLU2
706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
707 #endif
708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
710   B->spptr = lu;
711   *F = B;
712   PetscFunctionReturn(0);
713 }
714 EXTERN_C_END
715 
716