xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision a4efd8ea4bb0600a5aef92b2c23cbfa314827193)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* Flag to clean up (non-global) SuperLU objects during Destroy */
42   PetscTruth CleanUpSuperLU_Dist;
43 } Mat_SuperLU_DIST;
44 
45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
56 {
57   PetscErrorCode   ierr;
58   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
59 
60   PetscFunctionBegin;
61   if (lu->CleanUpSuperLU_Dist) {
62     /* Deallocate SuperLU_DIST storage */
63     if (lu->MatInputMode == GLOBAL) {
64       Destroy_CompCol_Matrix_dist(&lu->A_sup);
65     } else {
66       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
67       if ( lu->options.SolveInitialized ) {
68 #if defined(PETSC_USE_COMPLEX)
69         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
70 #else
71         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
72 #endif
73       }
74     }
75     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
76     ScalePermstructFree(&lu->ScalePermstruct);
77     LUstructFree(&lu->LUstruct);
78 
79     /* Release the SuperLU_DIST process grid. */
80     superlu_gridexit(&lu->grid);
81 
82     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
83   }
84 
85   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatSolve_SuperLU_DIST"
91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
92 {
93   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
94   PetscErrorCode   ierr;
95   PetscMPIInt      size;
96   PetscInt         m=A->rmap->N, N=A->cmap->N;
97   SuperLUStat_t    stat;
98   double           berr[1];
99   PetscScalar      *bptr;
100   PetscInt         info, nrhs=1;
101   Vec              x_seq;
102   IS               iden;
103   VecScatter       scat;
104 
105   PetscFunctionBegin;
106   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
107   if (size > 1) {
108     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
109       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
110       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
111       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
112       ierr = ISDestroy(iden);CHKERRQ(ierr);
113 
114       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
115       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
117     } else { /* distributed mat input */
118       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
119       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
120     }
121   } else { /* size == 1 */
122     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
123     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
124   }
125 
126   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
127 
128   PStatInit(&stat);        /* Initialize the statistics variables. */
129   if (lu->MatInputMode == GLOBAL) {
130 #if defined(PETSC_USE_COMPLEX)
131     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
132                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
133 #else
134     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
135                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
136 #endif
137   } else { /* distributed mat input */
138 #if defined(PETSC_USE_COMPLEX)
139     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
140 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
141     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
142 #else
143     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
144 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
145     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
146 #endif
147   }
148   if (lu->options.PrintStat) {
149      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
150   }
151   PStatFree(&stat);
152 
153   if (size > 1) {
154     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
155       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
156       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
157       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
158       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
159       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
160     } else {
161       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else {
164     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
165   }
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
172 {
173   Mat              *tseq,A_seq = PETSC_NULL;
174   Mat_SeqAIJ       *aa,*bb;
175   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
176   PetscErrorCode   ierr;
177   PetscInt         M=A->rmap->N,N=A->cmap->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
178                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
179   PetscMPIInt      size,rank;
180   SuperLUStat_t    stat;
181   double           *berr=0;
182   IS               isrow;
183   PetscLogDouble   time0,time,time_min,time_max;
184   Mat              F_diag=PETSC_NULL;
185 #if defined(PETSC_USE_COMPLEX)
186   doublecomplex    *av, *bv;
187 #else
188   double           *av, *bv;
189 #endif
190 
191   PetscFunctionBegin;
192   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
193   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
194 
195   if (lu->options.PrintStat) { /* collect time for mat conversion */
196     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
197     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
198   }
199 
200   if (lu->MatInputMode == GLOBAL) { /* global mat input */
201     if (size > 1) { /* convert mpi A to seq mat A */
202       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
203       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
204       ierr = ISDestroy(isrow);CHKERRQ(ierr);
205 
206       A_seq = *tseq;
207       ierr = PetscFree(tseq);CHKERRQ(ierr);
208       aa =  (Mat_SeqAIJ*)A_seq->data;
209     } else {
210       aa =  (Mat_SeqAIJ*)A->data;
211     }
212 
213     /* Convert Petsc NR matrix to SuperLU_DIST NC.
214        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
215     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
216       if (lu->FactPattern == SamePattern_SameRowPerm){
217         Destroy_CompCol_Matrix_dist(&lu->A_sup);
218         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
219         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
220       } else {
221         Destroy_CompCol_Matrix_dist(&lu->A_sup);
222         Destroy_LU(N, &lu->grid, &lu->LUstruct);
223         lu->options.Fact = SamePattern;
224       }
225     }
226 #if defined(PETSC_USE_COMPLEX)
227     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
228 #else
229     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
230 #endif
231 
232     /* Create compressed column matrix A_sup. */
233 #if defined(PETSC_USE_COMPLEX)
234     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
235 #else
236     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
237 #endif
238   } else { /* distributed mat input */
239     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
240     aa=(Mat_SeqAIJ*)(mat->A)->data;
241     bb=(Mat_SeqAIJ*)(mat->B)->data;
242     ai=aa->i; aj=aa->j;
243     bi=bb->i; bj=bb->j;
244 #if defined(PETSC_USE_COMPLEX)
245     av=(doublecomplex*)aa->a;
246     bv=(doublecomplex*)bb->a;
247 #else
248     av=aa->a;
249     bv=bb->a;
250 #endif
251     rstart = A->rmap->rstart;
252     nz     = aa->nz + bb->nz;
253     garray = mat->garray;
254 
255     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
262       if (lu->FactPattern == SamePattern_SameRowPerm){
263         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
264         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
265       } else {
266         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
267         lu->options.Fact = SamePattern;
268       }
269     }
270     nz = 0; irow = rstart;
271     for ( i=0; i<m; i++ ) {
272       lu->row[i] = nz;
273       countA = ai[i+1] - ai[i];
274       countB = bi[i+1] - bi[i];
275       ajj = aj + ai[i];  /* ptr to the beginning of this row */
276       bjj = bj + bi[i];
277 
278       /* B part, smaller col index */
279       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
280       jB = 0;
281       for (j=0; j<countB; j++){
282         jcol = garray[bjj[j]];
283         if (jcol > colA_start) {
284           jB = j;
285           break;
286         }
287         lu->col[nz] = jcol;
288         lu->val[nz++] = *bv++;
289         if (j==countB-1) jB = countB;
290       }
291 
292       /* A part */
293       for (j=0; j<countA; j++){
294         lu->col[nz] = rstart + ajj[j];
295         lu->val[nz++] = *av++;
296       }
297 
298       /* B part, larger col index */
299       for (j=jB; j<countB; j++){
300         lu->col[nz] = garray[bjj[j]];
301         lu->val[nz++] = *bv++;
302       }
303     }
304     lu->row[m] = nz;
305 #if defined(PETSC_USE_COMPLEX)
306     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
307 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
308 #else
309     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
310 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
311 #endif
312   }
313   if (lu->options.PrintStat) {
314     ierr = PetscGetTime(&time);CHKERRQ(ierr);
315     time0 = time - time0;
316   }
317 
318   /* Factor the matrix. */
319   PStatInit(&stat);   /* Initialize the statistics variables. */
320 
321   if (lu->MatInputMode == GLOBAL) { /* global mat input */
322 #if defined(PETSC_USE_COMPLEX)
323     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
324                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
325 #else
326     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
327                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
328 #endif
329   } else { /* distributed mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
332 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
333     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
334 #else
335     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
336 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
337     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
338 #endif
339   }
340 
341   if (lu->MatInputMode == GLOBAL && size > 1){
342     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
343   }
344 
345   if (lu->options.PrintStat) {
346     if (size > 1){
347       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
348       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
349       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
350       time = time/size; /* average time */
351       if (!rank) {
352         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
353       }
354     } else {
355       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
356     }
357     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
358   }
359   PStatFree(&stat);
360   if (size > 1){
361     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
362     F_diag->assembled = PETSC_TRUE;
363   }
364   (*F)->assembled    = PETSC_TRUE;
365   (*F)->preallocated = PETSC_TRUE;
366   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
367   PetscFunctionReturn(0);
368 }
369 
370 /* Note the Petsc r and c permutations are ignored */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
374 {
375   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (*F)->spptr;
376   PetscInt          M=A->rmap->N,N=A->cmap->N;
377 
378   PetscFunctionBegin;
379   /* Initialize the SuperLU process grid. */
380   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
381 
382   /* Initialize ScalePermstruct and LUstruct. */
383   ScalePermstructInit(M, N, &lu->ScalePermstruct);
384   LUstructInit(M, N, &lu->LUstruct);
385   (*F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
386   (*F)->ops->solve            = MatSolve_SuperLU_DIST;
387   PetscFunctionReturn(0);
388 }
389 
390 EXTERN_C_BEGIN
391 #undef __FUNCT__
392 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
393 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
394 {
395   Mat               B;
396   Mat_SuperLU_DIST  *lu;
397   PetscErrorCode    ierr;
398   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
399   PetscMPIInt       size;
400   superlu_options_t options;
401   PetscTruth        flg;
402   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
403   const char        *prtype[] = {"LargeDiag","NATURAL"};
404   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
405 
406   PetscFunctionBegin;
407   /* Create the factorization matrix */
408   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
409   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
410   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
411   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
412   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
413 
414   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
415   B->factor                = MAT_FACTOR_LU;
416 
417   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
418   B->spptr = lu;
419 
420   /* Set the default input options:
421      options.Fact              = DOFACT;
422      options.Equil             = YES;
423      options.ParSymbFact       = NO;
424      options.ColPerm           = MMD_AT_PLUS_A;
425      options.RowPerm           = LargeDiag;
426      options.ReplaceTinyPivot  = YES;
427      options.IterRefine        = DOUBLE;
428      options.Trans             = NOTRANS;
429      options.SolveInitialized  = NO;
430      options.RefineInitialized = NO;
431      options.PrintStat         = YES;
432   */
433   set_default_options_dist(&options);
434 
435   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
436   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
437   /* Default num of process columns and rows */
438   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
439   if (!lu->npcol) lu->npcol = 1;
440   while (lu->npcol > 0) {
441     lu->nprow = PetscMPIIntCast(size/lu->npcol);
442     if (size == lu->nprow * lu->npcol) break;
443     lu->npcol --;
444   }
445 
446   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
449     if (size != lu->nprow * lu->npcol)
450       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
451 
452     lu->MatInputMode = DISTRIBUTED;
453     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
454     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
455 
456     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
457     if (!flg) {
458       options.Equil = NO;
459     }
460 
461     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
462     if (flg) {
463       switch (indx) {
464       case 0:
465         options.RowPerm = LargeDiag;
466         break;
467       case 1:
468         options.RowPerm = NOROWPERM;
469         break;
470       }
471     }
472 
473     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
474     if (flg) {
475       switch (indx) {
476       case 0:
477         options.ColPerm = MMD_AT_PLUS_A;
478         break;
479       case 1:
480         options.ColPerm = NATURAL;
481         break;
482       case 2:
483         options.ColPerm = MMD_ATA;
484         break;
485       case 3:
486         options.ColPerm = PARMETIS;
487         break;
488       }
489     }
490 
491     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
492     if (!flg) {
493       options.ReplaceTinyPivot = NO;
494     }
495 
496     options.ParSymbFact = NO;
497     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
498     if (flg){
499 #ifdef PETSC_HAVE_PARMETIS
500       options.ParSymbFact = YES;
501       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
502 #else
503       printf("parsymbfact needs PARMETIS");
504 #endif
505     }
506 
507     lu->FactPattern = SamePattern;
508     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
509     if (flg) {
510       switch (indx) {
511       case 0:
512         lu->FactPattern = SamePattern;
513         break;
514       case 1:
515         lu->FactPattern = SamePattern_SameRowPerm;
516         break;
517       }
518     }
519 
520     options.IterRefine = NOREFINE;
521     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
522     if (flg) {
523       options.IterRefine = DOUBLE;
524     }
525 
526     if (PetscLogPrintInfo) {
527       options.PrintStat = YES;
528     } else {
529       options.PrintStat = NO;
530     }
531     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
532                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
533   PetscOptionsEnd();
534 
535   lu->options             = options;
536   lu->options.Fact        = DOFACT;
537   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
538   *F = B;
539   PetscFunctionReturn(0);
540 }
541 EXTERN_C_END
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
545 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
546 {
547   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
548   superlu_options_t options;
549   PetscErrorCode    ierr;
550 
551   PetscFunctionBegin;
552   /* check if matrix is superlu_dist type */
553   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
554 
555   options = lu->options;
556   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
557   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
558   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
559   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
560   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
562   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
563   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
564   if (options.ColPerm == NATURAL) {
565     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
566   } else if (options.ColPerm == MMD_AT_PLUS_A) {
567     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
568   } else if (options.ColPerm == MMD_ATA) {
569     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
570   } else if (options.ColPerm == PARMETIS) {
571     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
572   } else {
573     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
574   }
575 
576   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
577 
578   if (lu->FactPattern == SamePattern){
579     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
580   } else {
581     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatView_SuperLU_DIST"
588 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
589 {
590   PetscErrorCode    ierr;
591   PetscTruth        iascii;
592   PetscViewerFormat format;
593 
594   PetscFunctionBegin;
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
596   if (iascii) {
597     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
598     if (format == PETSC_VIEWER_ASCII_INFO) {
599       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
600     }
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 
606 /*MC
607   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
608   via the external package SuperLU_DIST.
609 
610   If SuperLU_DIST is installed (see the manual for
611   instructions on how to declare the existence of external packages),
612   a matrix type can be constructed which invokes SuperLU_DIST solvers.
613   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
614   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
615   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
616 
617   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
618   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
619   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
620   for communicators controlling multiple processes.  It is recommended that you call both of
621   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
622   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
623   without data copy; but this MUST be called AFTER the matrix values are set.
624 
625 
626 
627   Options Database Keys:
628 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
629 . -mat_superlu_dist_r <n> - number of rows in processor partition
630 . -mat_superlu_dist_c <n> - number of columns in processor partition
631 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
632 . -mat_superlu_dist_equil - equilibrate the matrix
633 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
634 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
635 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
636 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
637 . -mat_superlu_dist_iterrefine - use iterative refinement
638 - -mat_superlu_dist_statprint - print factorization information
639 
640    Level: beginner
641 
642 .seealso: PCLU
643 M*/
644 
645