xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 /*
2         Provides an interface to the SuperLU_DIST_2.0 sparse solver
3 */
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
8 #include "stdlib.h"
9 #endif
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "superlu_zdefs.h"
14 #else
15 #include "superlu_ddefs.h"
16 #endif
17 EXTERN_C_END
18 
19 typedef enum { GLOBAL,DISTRIBUTED
20 } SuperLU_MatInputMode;
21 
22 typedef struct {
23   int_t                   nprow,npcol,*row,*col;
24   gridinfo_t              grid;
25   superlu_options_t       options;
26   SuperMatrix             A_sup;
27   ScalePermstruct_t       ScalePermstruct;
28   LUstruct_t              LUstruct;
29   int                     StatPrint;
30   int                     MatInputMode;
31   SOLVEstruct_t           SOLVEstruct;
32   MatStructure            flg;
33   MPI_Comm                comm_superlu;
34 #if defined(PETSC_USE_COMPLEX)
35   doublecomplex           *val;
36 #else
37   double                  *val;
38 #endif
39 
40   /* A few function pointers for inheritance */
41   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
42   PetscErrorCode (*MatView)(Mat,PetscViewer);
43   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
44   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
45   PetscErrorCode (*MatDestroy)(Mat);
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
52 
53 EXTERN_C_BEGIN
54 #undef __FUNCT__
55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) {
57   PetscErrorCode ierr;
58   Mat              B=*newmat;
59   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
60 
61   PetscFunctionBegin;
62   if (B != A) {
63     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
64   }
65   /* Reset the original function pointers */
66   B->ops->duplicate        = lu->MatDuplicate;
67   B->ops->view             = lu->MatView;
68   B->ops->assemblyend      = lu->MatAssemblyEnd;
69   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
70   B->ops->destroy          = lu->MatDestroy;
71 
72   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
73   ierr = PetscFree(lu);CHKERRQ(ierr);
74 
75   *newmat = B;
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
82 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
83 {
84   PetscErrorCode ierr;
85   int              size;
86   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
87 
88   PetscFunctionBegin;
89   if (lu->CleanUpSuperLU_Dist) {
90     /* Deallocate SuperLU_DIST storage */
91     if (lu->MatInputMode == GLOBAL) {
92       Destroy_CompCol_Matrix_dist(&lu->A_sup);
93     } else {
94       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
95       if ( lu->options.SolveInitialized ) {
96 #if defined(PETSC_USE_COMPLEX)
97         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #else
99         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
100 #endif
101       }
102     }
103     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
104     ScalePermstructFree(&lu->ScalePermstruct);
105     LUstructFree(&lu->LUstruct);
106 
107     /* Release the SuperLU_DIST process grid. */
108     superlu_gridexit(&lu->grid);
109 
110     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
111   }
112 
113   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
114   if (size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
129   PetscErrorCode ierr;
130   int              size;
131   int              m=A->M, N=A->N;
132   SuperLUStat_t    stat;
133   double           berr[1];
134   PetscScalar      *bptr;
135   int              info, nrhs=1;
136   Vec              x_seq;
137   IS               iden;
138   VecScatter       scat;
139 
140   PetscFunctionBegin;
141   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
142   if (size > 1) {
143     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
144       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
145       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
146       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
147       ierr = ISDestroy(iden);CHKERRQ(ierr);
148 
149       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
151       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
152     } else { /* distributed mat input */
153       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
154       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
155     }
156   } else { /* size == 1 */
157     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
158     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
159   }
160 
161   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
162 
163   PStatInit(&stat);        /* Initialize the statistics variables. */
164 
165   if (lu->MatInputMode == GLOBAL) {
166 #if defined(PETSC_USE_COMPLEX)
167     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
168                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
169 #else
170     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
171                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
172 #endif
173   } else { /* distributed mat input */
174 #if defined(PETSC_USE_COMPLEX)
175     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
176 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
177     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
178 #else
179     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
180 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
181     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
182 #endif
183   }
184   if (lu->StatPrint) {
185      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
186   }
187   PStatFree(&stat);
188 
189   if (size > 1) {
190     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
191       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
192       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
193       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
194       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
195       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
196     } else {
197       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
198     }
199   } else {
200     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
207 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
208 {
209   Mat              *tseq,A_seq = PETSC_NULL;
210   Mat_SeqAIJ       *aa,*bb;
211   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
212   PetscErrorCode ierr;
213   int              M=A->M,N=A->N,info,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
214                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
215   SuperLUStat_t    stat;
216   double           *berr=0;
217   IS               isrow;
218   PetscLogDouble   time0,time,time_min,time_max;
219 #if defined(PETSC_USE_COMPLEX)
220   doublecomplex    *av, *bv;
221 #else
222   double           *av, *bv;
223 #endif
224 
225   PetscFunctionBegin;
226   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
227   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
228 
229   if (lu->StatPrint) { /* collect time for mat conversion */
230     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
231     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
232   }
233 
234   if (lu->MatInputMode == GLOBAL) { /* global mat input */
235     if (size > 1) { /* convert mpi A to seq mat A */
236       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
237       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
238       ierr = ISDestroy(isrow);CHKERRQ(ierr);
239 
240       A_seq = *tseq;
241       ierr = PetscFree(tseq);CHKERRQ(ierr);
242       aa =  (Mat_SeqAIJ*)A_seq->data;
243     } else {
244       aa =  (Mat_SeqAIJ*)A->data;
245     }
246 
247     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
248     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
249 #if defined(PETSC_USE_COMPLEX)
250       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
251 #else
252       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
253 #endif
254     } else { /* successive numeric factorization, sparsity pattern is reused. */
255       Destroy_CompCol_Matrix_dist(&lu->A_sup);
256       Destroy_LU(N, &lu->grid, &lu->LUstruct);
257       lu->options.Fact = SamePattern;
258     }
259 #if defined(PETSC_USE_COMPLEX)
260     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
261 #else
262     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
263 #endif
264 
265     /* Create compressed column matrix A_sup. */
266 #if defined(PETSC_USE_COMPLEX)
267     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
268 #else
269     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
270 #endif
271   } else { /* distributed mat input */
272     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
273     aa=(Mat_SeqAIJ*)(mat->A)->data;
274     bb=(Mat_SeqAIJ*)(mat->B)->data;
275     ai=aa->i; aj=aa->j;
276     bi=bb->i; bj=bb->j;
277 #if defined(PETSC_USE_COMPLEX)
278     av=(doublecomplex*)aa->a;
279     bv=(doublecomplex*)bb->a;
280 #else
281     av=aa->a;
282     bv=bb->a;
283 #endif
284     rstart = mat->rstart;
285     nz     = aa->nz + bb->nz;
286     garray = mat->garray;
287     rstart = mat->rstart;
288 
289     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
290 #if defined(PETSC_USE_COMPLEX)
291       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
292 #else
293       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
294 #endif
295     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
296       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
297       Destroy_LU(N, &lu->grid, &lu->LUstruct);
298       lu->options.Fact = SamePattern;
299     }
300     nz = 0; irow = mat->rstart;
301     for ( i=0; i<m; i++ ) {
302       lu->row[i] = nz;
303       countA = ai[i+1] - ai[i];
304       countB = bi[i+1] - bi[i];
305       ajj = aj + ai[i];  /* ptr to the beginning of this row */
306       bjj = bj + bi[i];
307 
308       /* B part, smaller col index */
309       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
310       jB = 0;
311       for (j=0; j<countB; j++){
312         jcol = garray[bjj[j]];
313         if (jcol > colA_start) {
314           jB = j;
315           break;
316         }
317         lu->col[nz] = jcol;
318         lu->val[nz++] = *bv++;
319         if (j==countB-1) jB = countB;
320       }
321 
322       /* A part */
323       for (j=0; j<countA; j++){
324         lu->col[nz] = mat->rstart + ajj[j];
325         lu->val[nz++] = *av++;
326       }
327 
328       /* B part, larger col index */
329       for (j=jB; j<countB; j++){
330         lu->col[nz] = garray[bjj[j]];
331         lu->val[nz++] = *bv++;
332       }
333     }
334     lu->row[m] = nz;
335 #if defined(PETSC_USE_COMPLEX)
336     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
337 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
338 #else
339     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
340 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
341 #endif
342   }
343   if (lu->StatPrint) {
344     ierr = PetscGetTime(&time);CHKERRQ(ierr);
345     time0 = time - time0;
346   }
347 
348   /* Factor the matrix. */
349   PStatInit(&stat);   /* Initialize the statistics variables. */
350 
351   if (lu->MatInputMode == GLOBAL) { /* global mat input */
352 #if defined(PETSC_USE_COMPLEX)
353     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
354                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
355 #else
356     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
357                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
358 #endif
359   } else { /* distributed mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
362 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
363     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
364 #else
365     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
366 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
367     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
368 #endif
369   }
370 
371   if (lu->MatInputMode == GLOBAL && size > 1){
372     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
373   }
374 
375   if (lu->StatPrint) {
376     if (size > 1){
377       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
378       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
379       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
380       time = time/size; /* average time */
381       if (!rank)
382         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
383                               %g / %g / %g\n",time_max,time_min,time);
384     } else {
385       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
386                               %g\n",time0);
387     }
388 
389     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
390   }
391   PStatFree(&stat);
392   (*F)->assembled = PETSC_TRUE;
393   lu->flg         = SAME_NONZERO_PATTERN;
394   PetscFunctionReturn(0);
395 }
396 
397 /* Note the Petsc r and c permutations are ignored */
398 #undef __FUNCT__
399 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
400 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
401 {
402   Mat               B;
403   Mat_SuperLU_DIST  *lu;
404   PetscErrorCode ierr;
405   int               M=A->M,N=A->N,size,indx;
406   superlu_options_t options;
407   PetscTruth        flg;
408   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
409   const char        *prtype[] = {"LargeDiag","NATURAL"};
410 
411   PetscFunctionBegin;
412   /* Create the factorization matrix */
413   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
414   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
415   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
416   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
417 
418   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
419   B->ops->solve            = MatSolve_SuperLU_DIST;
420   B->factor                = FACTOR_LU;
421 
422   lu = (Mat_SuperLU_DIST*)(B->spptr);
423 
424   /* Set the input options */
425   set_default_options_dist(&options);
426   lu->MatInputMode = GLOBAL;
427   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
428 
429   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
430   lu->nprow = size/2;               /* Default process rows.      */
431   if (!lu->nprow) lu->nprow = 1;
432   lu->npcol = size/lu->nprow;           /* Default process columns.   */
433 
434   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
435 
436     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
437     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
438     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
439 
440     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
441     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
442 
443     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
444     if (!flg) {
445       options.Equil = NO;
446     }
447 
448     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
449     if (flg) {
450       switch (indx) {
451       case 0:
452         options.RowPerm = LargeDiag;
453         break;
454       case 1:
455         options.RowPerm = NOROWPERM;
456         break;
457       }
458     }
459 
460     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
461     if (flg) {
462       switch (indx) {
463       case 0:
464         options.ColPerm = MMD_AT_PLUS_A;
465         break;
466       case 1:
467         options.ColPerm = NATURAL;
468         break;
469       case 2:
470         options.ColPerm = MMD_ATA;
471         break;
472       case 3:
473         options.ColPerm = COLAMD;
474         break;
475       }
476     }
477 
478     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
479     if (!flg) {
480       options.ReplaceTinyPivot = NO;
481     }
482 
483     options.IterRefine = NOREFINE;
484     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
485     if (flg) {
486       options.IterRefine = DOUBLE;
487     }
488 
489     if (PetscLogPrintInfo) {
490       lu->StatPrint = (int)PETSC_TRUE;
491     } else {
492       lu->StatPrint = (int)PETSC_FALSE;
493     }
494     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
495                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
496   PetscOptionsEnd();
497 
498   /* Initialize the SuperLU process grid. */
499   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
500 
501   /* Initialize ScalePermstruct and LUstruct. */
502   ScalePermstructInit(M, N, &lu->ScalePermstruct);
503   LUstructInit(M, N, &lu->LUstruct);
504 
505   lu->options            = options;
506   lu->flg                = DIFFERENT_NONZERO_PATTERN;
507   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
508   *F = B;
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
514 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
515   PetscErrorCode ierr;
516   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
517 
518   PetscFunctionBegin;
519   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
520   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
521   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
522   PetscFunctionReturn(0);
523 }
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
527 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
528 {
529   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
530   superlu_options_t options;
531   PetscErrorCode ierr;
532   char              *colperm;
533 
534   PetscFunctionBegin;
535   /* check if matrix is superlu_dist type */
536   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
537 
538   options = lu->options;
539   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
540   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
541   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
542   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
543   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
544   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
545   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
546   if (options.ColPerm == NATURAL) {
547     colperm = "NATURAL";
548   } else if (options.ColPerm == MMD_AT_PLUS_A) {
549     colperm = "MMD_AT_PLUS_A";
550   } else if (options.ColPerm == MMD_ATA) {
551     colperm = "MMD_ATA";
552   } else if (options.ColPerm == COLAMD) {
553     colperm = "COLAMD";
554   } else {
555     SETERRQ(1,"Unknown column permutation");
556   }
557   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatView_SuperLU_DIST"
563 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
564 {
565   PetscErrorCode ierr;
566   PetscTruth        iascii;
567   PetscViewerFormat format;
568   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
569 
570   PetscFunctionBegin;
571   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
572 
573   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
574   if (iascii) {
575     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
576     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
577       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
578     }
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 
584 EXTERN_C_BEGIN
585 #undef __FUNCT__
586 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
587 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) {
588   /* This routine is only called to convert to MATSUPERLU_DIST */
589   /* from MATSEQAIJ if A has a single process communicator */
590   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
591   PetscErrorCode ierr;
592   int              size;
593   MPI_Comm         comm;
594   Mat              B=*newmat;
595   Mat_SuperLU_DIST *lu;
596 
597   PetscFunctionBegin;
598   if (B != A) {
599     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
600   }
601 
602   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
603   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
604 
605   lu->MatDuplicate         = A->ops->duplicate;
606   lu->MatView              = A->ops->view;
607   lu->MatAssemblyEnd       = A->ops->assemblyend;
608   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
609   lu->MatDestroy           = A->ops->destroy;
610   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
611 
612   B->spptr                 = (void*)lu;
613   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
614   B->ops->view             = MatView_SuperLU_DIST;
615   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
616   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
617   B->ops->destroy          = MatDestroy_SuperLU_DIST;
618   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
619   if (size == 1) {
620     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
621                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
622     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
623                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
624   } else {
625     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
626                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
627     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
628                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
629   }
630   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
631   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
632   *newmat = B;
633   PetscFunctionReturn(0);
634 }
635 EXTERN_C_END
636 
637 #undef __FUNCT__
638 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
639 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
640   PetscErrorCode ierr;
641   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
642 
643   PetscFunctionBegin;
644   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
645   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 /*MC
650   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
651   via the external package SuperLU_DIST.
652 
653   If SuperLU_DIST is installed (see the manual for
654   instructions on how to declare the existence of external packages),
655   a matrix type can be constructed which invokes SuperLU_DIST solvers.
656   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
657   This matrix type is only supported for double precision real.
658 
659   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
660   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
661   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
662   for communicators controlling multiple processes.  It is recommended that you call both of
663   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
664   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
665   without data copy.
666 
667   Options Database Keys:
668 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
669 . -mat_superlu_dist_r <n> - number of rows in processor partition
670 . -mat_superlu_dist_c <n> - number of columns in processor partition
671 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
672 . -mat_superlu_dist_equil - equilibrate the matrix
673 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
674 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
675 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
676 . -mat_superlu_dist_iterrefine - use iterative refinement
677 - -mat_superlu_dist_statprint - print factorization information
678 
679    Level: beginner
680 
681 .seealso: PCLU
682 M*/
683 
684 EXTERN_C_BEGIN
685 #undef __FUNCT__
686 #define __FUNCT__ "MatCreate_SuperLU_DIST"
687 PetscErrorCode MatCreate_SuperLU_DIST(Mat A)
688 {
689   PetscErrorCode ierr;
690   int size;
691   Mat A_diag;
692 
693   PetscFunctionBegin;
694   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
695   /*   and SuperLU_DIST types */
696   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
697   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
698   if (size == 1) {
699     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
700   } else {
701     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
702     A_diag = ((Mat_MPIAIJ *)A->data)->A;
703     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr);
704   }
705   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 EXTERN_C_END
709 
710