xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 7d6bfa3b9d7db0ccd4cc481237114ca8dbb0dbff)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "../src/mat/impls/aij/seq/aij.h"
8 #include "../src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 #if defined(PETSC_USE_64BIT_INDICES)
14 /* ugly SuperLU_Dist variable telling it to use long long int */
15 #define _LONGINT
16 #endif
17 
18 EXTERN_C_BEGIN
19 #if defined(PETSC_USE_COMPLEX)
20 #include "superlu_zdefs.h"
21 #else
22 #include "superlu_ddefs.h"
23 #endif
24 EXTERN_C_END
25 
26 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
27 const char *SuperLU_MatInputModes[]    = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
28 
29 typedef struct {
30   int_t                   nprow,npcol,*row,*col;
31   gridinfo_t              grid;
32   superlu_options_t       options;
33   SuperMatrix             A_sup;
34   ScalePermstruct_t       ScalePermstruct;
35   LUstruct_t              LUstruct;
36   int                     StatPrint;
37   SuperLU_MatInputMode    MatInputMode;
38   SOLVEstruct_t           SOLVEstruct;
39   fact_t                  FactPattern;
40   MPI_Comm                comm_superlu;
41 #if defined(PETSC_USE_COMPLEX)
42   doublecomplex           *val;
43 #else
44   double                  *val;
45 #endif
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
52 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
53 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
54 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
56 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
57 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
61 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
62 {
63   PetscErrorCode   ierr;
64   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
65 
66   PetscFunctionBegin;
67   if (lu->CleanUpSuperLU_Dist) {
68     /* Deallocate SuperLU_DIST storage */
69     if (lu->MatInputMode == GLOBAL) {
70       Destroy_CompCol_Matrix_dist(&lu->A_sup);
71     } else {
72       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
73       if ( lu->options.SolveInitialized ) {
74 #if defined(PETSC_USE_COMPLEX)
75         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
76 #else
77         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
78 #endif
79       }
80     }
81     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
82     ScalePermstructFree(&lu->ScalePermstruct);
83     LUstructFree(&lu->LUstruct);
84 
85     /* Release the SuperLU_DIST process grid. */
86     superlu_gridexit(&lu->grid);
87 
88     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
89   }
90 
91   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatSolve_SuperLU_DIST"
97 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
98 {
99   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
100   PetscErrorCode   ierr;
101   PetscMPIInt      size;
102   PetscInt         m=A->rmap->N, N=A->cmap->N;
103   SuperLUStat_t    stat;
104   double           berr[1];
105   PetscScalar      *bptr;
106   PetscInt         nrhs=1;
107   Vec              x_seq;
108   IS               iden;
109   VecScatter       scat;
110   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
111 
112   PetscFunctionBegin;
113   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
114   if (size > 1) {
115     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
116       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
117       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
118       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
119       ierr = ISDestroy(iden);CHKERRQ(ierr);
120 
121       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
123       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
124     } else { /* distributed mat input */
125       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
126       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
127     }
128   } else { /* size == 1 */
129     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
130     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
131   }
132 
133   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
134 
135   PStatInit(&stat);        /* Initialize the statistics variables. */
136   if (lu->MatInputMode == GLOBAL) {
137 #if defined(PETSC_USE_COMPLEX)
138     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
139                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
140 #else
141     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
142                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
143 #endif
144   } else { /* distributed mat input */
145 #if defined(PETSC_USE_COMPLEX)
146     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
147 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
148     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
149 #else
150     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
151 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
152     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
153 #endif
154   }
155   if (lu->options.PrintStat) {
156      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
157   }
158   PStatFree(&stat);
159 
160   if (size > 1) {
161     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
162       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
163       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
165       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
166       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
167     } else {
168       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else {
171     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
178 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
179 {
180   Mat              *tseq,A_seq = PETSC_NULL;
181   Mat_SeqAIJ       *aa,*bb;
182   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
183   PetscErrorCode   ierr;
184   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
185                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
186   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
187   PetscMPIInt      size,rank;
188   SuperLUStat_t    stat;
189   double           *berr=0;
190   IS               isrow;
191   PetscLogDouble   time0,time,time_min,time_max;
192   Mat              F_diag=PETSC_NULL;
193 #if defined(PETSC_USE_COMPLEX)
194   doublecomplex    *av, *bv;
195 #else
196   double           *av, *bv;
197 #endif
198 
199   PetscFunctionBegin;
200   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
201   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
202 
203   if (lu->options.PrintStat) { /* collect time for mat conversion */
204     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
205     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
206   }
207 
208   if (lu->MatInputMode == GLOBAL) { /* global mat input */
209     if (size > 1) { /* convert mpi A to seq mat A */
210       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
211       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
212       ierr = ISDestroy(isrow);CHKERRQ(ierr);
213 
214       A_seq = *tseq;
215       ierr = PetscFree(tseq);CHKERRQ(ierr);
216       aa =  (Mat_SeqAIJ*)A_seq->data;
217     } else {
218       aa =  (Mat_SeqAIJ*)A->data;
219     }
220 
221     /* Convert Petsc NR matrix to SuperLU_DIST NC.
222        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
223     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
224       if (lu->FactPattern == SamePattern_SameRowPerm){
225         Destroy_CompCol_Matrix_dist(&lu->A_sup);
226         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
227         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
228       } else {
229         Destroy_CompCol_Matrix_dist(&lu->A_sup);
230         Destroy_LU(N, &lu->grid, &lu->LUstruct);
231         lu->options.Fact = SamePattern;
232       }
233     }
234 #if defined(PETSC_USE_COMPLEX)
235     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
236 #else
237     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
238 #endif
239 
240     /* Create compressed column matrix A_sup. */
241 #if defined(PETSC_USE_COMPLEX)
242     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
243 #else
244     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
245 #endif
246   } else { /* distributed mat input */
247     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
248     aa=(Mat_SeqAIJ*)(mat->A)->data;
249     bb=(Mat_SeqAIJ*)(mat->B)->data;
250     ai=aa->i; aj=aa->j;
251     bi=bb->i; bj=bb->j;
252 #if defined(PETSC_USE_COMPLEX)
253     av=(doublecomplex*)aa->a;
254     bv=(doublecomplex*)bb->a;
255 #else
256     av=aa->a;
257     bv=bb->a;
258 #endif
259     rstart = A->rmap->rstart;
260     nz     = aa->nz + bb->nz;
261     garray = mat->garray;
262 
263     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
264 #if defined(PETSC_USE_COMPLEX)
265       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
266 #else
267       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
268 #endif
269     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
270       if (lu->FactPattern == SamePattern_SameRowPerm){
271         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
272         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
273       } else {
274         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
275         lu->options.Fact = SamePattern;
276       }
277     }
278     nz = 0; irow = rstart;
279     for ( i=0; i<m; i++ ) {
280       lu->row[i] = nz;
281       countA = ai[i+1] - ai[i];
282       countB = bi[i+1] - bi[i];
283       ajj = aj + ai[i];  /* ptr to the beginning of this row */
284       bjj = bj + bi[i];
285 
286       /* B part, smaller col index */
287       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
288       jB = 0;
289       for (j=0; j<countB; j++){
290         jcol = garray[bjj[j]];
291         if (jcol > colA_start) {
292           jB = j;
293           break;
294         }
295         lu->col[nz] = jcol;
296         lu->val[nz++] = *bv++;
297         if (j==countB-1) jB = countB;
298       }
299 
300       /* A part */
301       for (j=0; j<countA; j++){
302         lu->col[nz] = rstart + ajj[j];
303         lu->val[nz++] = *av++;
304       }
305 
306       /* B part, larger col index */
307       for (j=jB; j<countB; j++){
308         lu->col[nz] = garray[bjj[j]];
309         lu->val[nz++] = *bv++;
310       }
311     }
312     lu->row[m] = nz;
313 #if defined(PETSC_USE_COMPLEX)
314     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
315 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
316 #else
317     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
318 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
319 #endif
320   }
321   if (lu->options.PrintStat) {
322     ierr = PetscGetTime(&time);CHKERRQ(ierr);
323     time0 = time - time0;
324   }
325 
326   /* Factor the matrix. */
327   PStatInit(&stat);   /* Initialize the statistics variables. */
328   CHKMEMQ;
329   if (lu->MatInputMode == GLOBAL) { /* global mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
332                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
333 #else
334     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
335                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
336 #endif
337   } else { /* distributed mat input */
338 #if defined(PETSC_USE_COMPLEX)
339     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
340 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
341     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
342 #else
343     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
344 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
345     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
346 #endif
347   }
348 
349   if (lu->MatInputMode == GLOBAL && size > 1){
350     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
351   }
352 
353   if (lu->options.PrintStat) {
354     if (size > 1){
355       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
356       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
357       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
358       time = time/size; /* average time */
359       if (!rank) {
360         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
361       }
362     } else {
363       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
364     }
365     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
366   }
367   PStatFree(&stat);
368   if (size > 1){
369     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
370     F_diag->assembled = PETSC_TRUE;
371   }
372   (F)->assembled    = PETSC_TRUE;
373   (F)->preallocated = PETSC_TRUE;
374   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
375   PetscFunctionReturn(0);
376 }
377 
378 /* Note the Petsc r and c permutations are ignored */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
381 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
382 {
383   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
384   PetscInt          M=A->rmap->N,N=A->cmap->N;
385 
386   PetscFunctionBegin;
387   /* Initialize the SuperLU process grid. */
388   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
389 
390   /* Initialize ScalePermstruct and LUstruct. */
391   ScalePermstructInit(M, N, &lu->ScalePermstruct);
392   LUstructInit(M, N, &lu->LUstruct);
393   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
394   (F)->ops->solve            = MatSolve_SuperLU_DIST;
395   PetscFunctionReturn(0);
396 }
397 
398 EXTERN_C_BEGIN
399 #undef __FUNCT__
400 #define __FUNCT__ "MatFactorGetSolverPackage_aij_superlu_dist"
401 PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
402 {
403   PetscFunctionBegin;
404   *type = MAT_SOLVER_SUPERLU_DIST;
405   PetscFunctionReturn(0);
406 }
407 EXTERN_C_END
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatGetFactor_aij_superlu_dist"
411 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
412 {
413   Mat               B;
414   Mat_SuperLU_DIST  *lu;
415   PetscErrorCode    ierr;
416   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
417   PetscMPIInt       size;
418   superlu_options_t options;
419   PetscTruth        flg;
420   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
421   const char        *prtype[] = {"LargeDiag","NATURAL"};
422   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
423 
424   PetscFunctionBegin;
425   /* Create the factorization matrix */
426   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
427   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
428   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
429   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
430   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
431 
432   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
433   B->ops->view             = MatView_SuperLU_DIST;
434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);CHKERRQ(ierr);
435   B->factor                = MAT_FACTOR_LU;
436 
437   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
438   B->spptr = lu;
439 
440   /* Set the default input options:
441      options.Fact              = DOFACT;
442      options.Equil             = YES;
443      options.ParSymbFact       = NO;
444      options.ColPerm           = MMD_AT_PLUS_A;
445      options.RowPerm           = LargeDiag;
446      options.ReplaceTinyPivot  = YES;
447      options.IterRefine        = DOUBLE;
448      options.Trans             = NOTRANS;
449      options.SolveInitialized  = NO;
450      options.RefineInitialized = NO;
451      options.PrintStat         = YES;
452   */
453   set_default_options_dist(&options);
454 
455   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
456   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
457   /* Default num of process columns and rows */
458   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
459   if (!lu->npcol) lu->npcol = 1;
460   while (lu->npcol > 0) {
461     lu->nprow = PetscMPIIntCast(size/lu->npcol);
462     if (size == lu->nprow * lu->npcol) break;
463     lu->npcol --;
464   }
465 
466   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
467     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
468     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
469     if (size != lu->nprow * lu->npcol)
470       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
471 
472     lu->MatInputMode = DISTRIBUTED;
473     ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
474     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
475 
476     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
477     if (!flg) {
478       options.Equil = NO;
479     }
480 
481     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
482     if (flg) {
483       switch (indx) {
484       case 0:
485         options.RowPerm = LargeDiag;
486         break;
487       case 1:
488         options.RowPerm = NOROWPERM;
489         break;
490       }
491     }
492 
493     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
494     if (flg) {
495       switch (indx) {
496       case 0:
497         options.ColPerm = MMD_AT_PLUS_A;
498         break;
499       case 1:
500         options.ColPerm = NATURAL;
501         break;
502       case 2:
503         options.ColPerm = MMD_ATA;
504         break;
505       case 3:
506         options.ColPerm = PARMETIS;
507         break;
508       }
509     }
510 
511     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
512     if (!flg) {
513       options.ReplaceTinyPivot = NO;
514     }
515 
516     options.ParSymbFact = NO;
517     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
518     if (flg){
519 #ifdef PETSC_HAVE_PARMETIS
520       options.ParSymbFact = YES;
521       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
522 #else
523       printf("parsymbfact needs PARMETIS");
524 #endif
525     }
526 
527     lu->FactPattern = SamePattern;
528     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
529     if (flg) {
530       switch (indx) {
531       case 0:
532         lu->FactPattern = SamePattern;
533         break;
534       case 1:
535         lu->FactPattern = SamePattern_SameRowPerm;
536         break;
537       }
538     }
539 
540     options.IterRefine = NOREFINE;
541     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
542     if (flg) {
543       options.IterRefine = DOUBLE;
544     }
545 
546     if (PetscLogPrintInfo) {
547       options.PrintStat = YES;
548     } else {
549       options.PrintStat = NO;
550     }
551     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
552                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
553   PetscOptionsEnd();
554 
555   lu->options             = options;
556   lu->options.Fact        = DOFACT;
557   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
558   *F = B;
559   PetscFunctionReturn(0);
560 }
561 
562 EXTERN_C_BEGIN
563 #undef __FUNCT__
564 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist"
565 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
566 {
567   PetscErrorCode ierr;
568 
569   PetscFunctionBegin;
570   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 EXTERN_C_END
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
578 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
579 {
580   PetscErrorCode ierr;
581 
582   PetscFunctionBegin;
583   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 EXTERN_C_END
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
590 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
591 {
592   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
593   superlu_options_t options;
594   PetscErrorCode    ierr;
595 
596   PetscFunctionBegin;
597   /* check if matrix is superlu_dist type */
598   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
599 
600   options = lu->options;
601   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
602   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
603   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
604   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
605   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
606   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
607   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
608   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
609   if (options.ColPerm == NATURAL) {
610     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
611   } else if (options.ColPerm == MMD_AT_PLUS_A) {
612     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
613   } else if (options.ColPerm == MMD_ATA) {
614     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
615   } else if (options.ColPerm == PARMETIS) {
616     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
617   } else {
618     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
619   }
620 
621   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
622 
623   if (lu->FactPattern == SamePattern){
624     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
625   } else {
626     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatView_SuperLU_DIST"
633 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
634 {
635   PetscErrorCode    ierr;
636   PetscTruth        iascii;
637   PetscViewerFormat format;
638 
639   PetscFunctionBegin;
640   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
641   if (iascii) {
642     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
643     if (format == PETSC_VIEWER_ASCII_INFO) {
644       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
645     }
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 
651 /*MC
652   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
653   via the external package SuperLU_DIST.
654 
655   If SuperLU_DIST is installed (see the manual for
656   instructions on how to declare the existence of external packages),
657   a matrix type can be constructed which invokes SuperLU_DIST solvers.
658   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
659   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
660   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
661 
662   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
663   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
664   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
665   for communicators controlling multiple processes.  It is recommended that you call both of
666   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
667   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
668   without data copy; but this MUST be called AFTER the matrix values are set.
669 
670 
671 
672   Options Database Keys:
673 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
674 . -mat_superlu_dist_r <n> - number of rows in processor partition
675 . -mat_superlu_dist_c <n> - number of columns in processor partition
676 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
677 . -mat_superlu_dist_equil - equilibrate the matrix
678 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
679 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
680 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
681 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
682 . -mat_superlu_dist_iterrefine - use iterative refinement
683 - -mat_superlu_dist_statprint - print factorization information
684 
685    Level: beginner
686 
687 .seealso: PCLU
688 M*/
689 
690