xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 1bb69edbc2f6efc8bcb8b8b24da8e7bcfef7c08b)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   PetscMPIInt    myid,size;
36   PetscInt       *irn,*jcn,sym,nSolve;
37   PetscScalar    *val;
38   MPI_Comm       comm_mumps;
39   VecScatter     scat_rhs, scat_sol;
40   PetscTruth     isAIJ,CleanUpMUMPS;
41   Vec            b_seq,x_seq;
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48   PetscErrorCode (*specialdestroy)(Mat);
49   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
50 } Mat_MUMPS;
51 
52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
53 EXTERN_C_BEGIN
54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
55 EXTERN_C_END
56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
57 /*
58   input:
59     A       - matrix in mpiaij or mpisbaij (bs=1) format
60     shift   - 0: C style output triple; 1: Fortran style output triple.
61     valOnly - FALSE: spaces are allocated and values are set for the triple
62               TRUE:  only the values in v array are updated
63   output:
64     nnz     - dim of r, c, and v (number of local nonzero entries of A)
65     r, c, v - row and col index, matrix values (matrix triples)
66  */
67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
68   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
69   PetscErrorCode ierr;
70   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
71   PetscInt       *row,*col;
72   PetscScalar    *av, *bv,*val;
73   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
74 
75   PetscFunctionBegin;
76   if (mumps->isAIJ){
77     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
78     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
79     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
80     nz = aa->nz + bb->nz;
81     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
82     garray = mat->garray;
83     av=aa->a; bv=bb->a;
84 
85   } else {
86     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
87     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
88     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
89     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
90     nz = aa->nz + bb->nz;
91     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
92     garray = mat->garray;
93     av=aa->a; bv=bb->a;
94   }
95 
96   if (!valOnly){
97     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
98     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
99     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
100     *r = row; *c = col; *v = val;
101   } else {
102     row = *r; col = *c; val = *v;
103   }
104   *nnz = nz;
105 
106   jj = 0; irow = rstart;
107   for ( i=0; i<m; i++ ) {
108     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109     countA = ai[i+1] - ai[i];
110     countB = bi[i+1] - bi[i];
111     bjj = bj + bi[i];
112 
113     /* get jB, the starting local col index for the 2nd B-part */
114     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115     j=-1;
116     do {
117       j++;
118       if (j == countB) break;
119       jcol = garray[bjj[j]];
120     } while (jcol < colA_start);
121     jB = j;
122 
123     /* B-part, smaller col index */
124     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125     for (j=0; j<jB; j++){
126       jcol = garray[bjj[j]];
127       if (!valOnly){
128         row[jj] = irow + shift; col[jj] = jcol + shift;
129 
130       }
131       val[jj++] = *bv++;
132     }
133     /* A-part */
134     for (j=0; j<countA; j++){
135       if (!valOnly){
136         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137       }
138       val[jj++] = *av++;
139     }
140     /* B-part, larger col index */
141     for (j=jB; j<countB; j++){
142       if (!valOnly){
143         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144       }
145       val[jj++] = *bv++;
146     }
147     irow++;
148   }
149 
150   PetscFunctionReturn(0);
151 }
152 
153 EXTERN_C_BEGIN
154 #undef __FUNCT__
155 #define __FUNCT__ "MatConvert_MUMPS_Base"
156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157 {
158   PetscErrorCode ierr;
159   Mat            B=*newmat;
160   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161   void           (*f)(void);
162 
163   PetscFunctionBegin;
164   if (reuse == MAT_INITIAL_MATRIX) {
165     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
166   }
167   B->ops->duplicate              = mumps->MatDuplicate;
168   B->ops->view                   = mumps->MatView;
169   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172   B->ops->destroy                = mumps->MatDestroy;
173 
174   /* put back original composed preallocation function */
175   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
176   if (f) {
177     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr);
178   }
179 
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
188 
189   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
190   *newmat = B;
191   PetscFunctionReturn(0);
192 }
193 EXTERN_C_END
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatDestroy_MUMPS"
197 PetscErrorCode MatDestroy_MUMPS(Mat A)
198 {
199   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
200   PetscErrorCode ierr;
201   PetscMPIInt    size=lu->size;
202   PetscErrorCode (*specialdestroy)(Mat);
203   PetscFunctionBegin;
204   if (lu->CleanUpMUMPS) {
205     /* Terminate instance, deallocate memories */
206     if (size > 1){
207       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
208       ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr);
209       ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr);
210       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
211       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
212       ierr = PetscFree(lu->val);CHKERRQ(ierr);
213     }
214     lu->id.job=JOB_END;
215 #if defined(PETSC_USE_COMPLEX)
216     zmumps_c(&lu->id);
217 #else
218     dmumps_c(&lu->id);
219 #endif
220     ierr = PetscFree(lu->irn);CHKERRQ(ierr);
221     ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
222     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
223   }
224   specialdestroy = lu->specialdestroy;
225   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
226   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatDestroy_AIJMUMPS"
232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
233 {
234   PetscErrorCode ierr;
235   PetscMPIInt    size;
236 
237   PetscFunctionBegin;
238   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
239   if (size==1) {
240     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
241   } else {
242     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
250 {
251   PetscErrorCode ierr;
252   PetscMPIInt    size;
253 
254   PetscFunctionBegin;
255   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
256   if (size==1) {
257     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
258   } else {
259     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatSolve_MUMPS"
266 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) {
267   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
268   PetscScalar    *array;
269   Vec            x_seq;
270   IS             is_iden,is_petsc;
271   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
272   PetscErrorCode ierr;
273   PetscInt       i;
274 
275   PetscFunctionBegin;
276   lu->id.nrhs = 1;
277   x_seq = lu->b_seq;
278   if (lu->size > 1){
279     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
280     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
281     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
282     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
283   } else {  /* size == 1 */
284     ierr = VecCopy(b,x);CHKERRQ(ierr);
285     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
286   }
287   if (!lu->myid) { /* define rhs on the host */
288 #if defined(PETSC_USE_COMPLEX)
289     lu->id.rhs = (mumps_double_complex*)array;
290 #else
291     lu->id.rhs = array;
292 #endif
293   }
294   if (lu->size == 1){
295     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
296   } else if (!lu->myid){
297     ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
298   }
299 
300   if (lu->size > 1){
301     /* distributed solution */
302     lu->id.ICNTL(21) = 1;
303     if (!lu->nSolve){
304       /* Create x_seq=sol_loc for repeated use */
305       PetscInt    lsol_loc;
306       PetscScalar *sol_loc;
307       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
308       ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr);
309       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
310       lu->id.lsol_loc = lsol_loc;
311       lu->id.sol_loc  = (F_DOUBLE *)sol_loc;
312       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr);
313     }
314   }
315 
316   /* solve phase */
317   /*-------------*/
318   lu->id.job = 3;
319 #if defined(PETSC_USE_COMPLEX)
320   zmumps_c(&lu->id);
321 #else
322   dmumps_c(&lu->id);
323 #endif
324   if (lu->id.INFOG(1) < 0) {
325     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
326   }
327 
328   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
329     if (!lu->nSolve){ /* create scatter scat_sol */
330       ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
331       for (i=0; i<lu->id.lsol_loc; i++){
332         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
333       }
334       ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr);  /* to */
335       ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr);
336       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
337       ierr = ISDestroy(is_petsc);CHKERRQ(ierr);
338     }
339     ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
340     ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
341   }
342   lu->nSolve++;
343   PetscFunctionReturn(0);
344 }
345 
346 /*
347   input:
348    F:        numeric factor
349   output:
350    nneg:     total number of negative pivots
351    nzero:    0
352    npos:     (global dimension of F) - nneg
353 */
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
357 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
358 {
359   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
360   PetscErrorCode ierr;
361   PetscMPIInt    size;
362 
363   PetscFunctionBegin;
364   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
365   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
366   if (size > 1 && lu->id.ICNTL(13) != 1){
367     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
368   }
369   if (nneg){
370     if (!lu->myid){
371       *nneg = lu->id.INFOG(12);
372     }
373     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
374   }
375   if (nzero) *nzero = 0;
376   if (npos)  *npos  = F->rmap.N - (*nneg);
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatFactorNumeric_MUMPS"
382 PetscErrorCode MatFactorNumeric_MUMPS(Mat A,MatFactorInfo *info,Mat *F)
383 {
384   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
385   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
386   PetscErrorCode ierr;
387   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
388   PetscTruth     valOnly,flg;
389   Mat            F_diag;
390 
391   PetscFunctionBegin;
392   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
393     (*F)->ops->solve    = MatSolve_MUMPS;
394 
395     /* Initialize a MUMPS instance */
396     ierr = MPI_Comm_rank(A->comm, &lu->myid);
397     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
398     lua->myid = lu->myid; lua->size = lu->size;
399     lu->id.job = JOB_INIT;
400     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
401     ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr);
402 
403     /* Set mumps options */
404     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
405     lu->id.par=1;  /* host participates factorizaton and solve */
406     lu->id.sym=lu->sym;
407     if (lu->sym == 2){
408       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
409       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
410     }
411 #if defined(PETSC_USE_COMPLEX)
412     zmumps_c(&lu->id);
413 #else
414     dmumps_c(&lu->id);
415 #endif
416 
417     if (lu->size == 1){
418       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
419     } else {
420       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
421     }
422 
423     icntl=-1;
424     lu->id.ICNTL(4) = 0;  /* level of printing; overwrite mumps default ICNTL(4)=2 */
425     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
426     if ((flg && icntl > 0) || PetscLogPrintInfo) {
427       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
428     } else { /* no output */
429       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
430       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
431       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
432     }
433     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
434     icntl=-1;
435     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
436     if (flg) {
437       if (icntl== 1){
438         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
439       } else {
440         lu->id.ICNTL(7) = icntl;
441       }
442     }
443     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
444     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
445     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
446     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
449     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
450 
451     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
452     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
453     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
454     ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr);
455     PetscOptionsEnd();
456   }
457 
458   /* define matrix A */
459   switch (lu->id.ICNTL(18)){
460   case 0:  /* centralized assembled matrix input (size=1) */
461     if (!lu->myid) {
462       if (lua->isAIJ){
463         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
464         nz               = aa->nz;
465         ai = aa->i; aj = aa->j; lu->val = aa->a;
466       } else {
467         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
468         nz                  =  aa->nz;
469         ai = aa->i; aj = aa->j; lu->val = aa->a;
470       }
471       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
472         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
473         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
474         nz = 0;
475         for (i=0; i<M; i++){
476           rnz = ai[i+1] - ai[i];
477           while (rnz--) {  /* Fortran row/col index! */
478             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
479           }
480         }
481       }
482     }
483     break;
484   case 3:  /* distributed assembled matrix input (size>1) */
485     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
486       valOnly = PETSC_FALSE;
487     } else {
488       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
489     }
490     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
491     break;
492   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
493   }
494 
495   /* analysis phase */
496   /*----------------*/
497   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
498     lu->id.job = 1;
499 
500     lu->id.n = M;
501     switch (lu->id.ICNTL(18)){
502     case 0:  /* centralized assembled matrix input */
503       if (!lu->myid) {
504         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
505         if (lu->id.ICNTL(6)>1){
506 #if defined(PETSC_USE_COMPLEX)
507           lu->id.a = (mumps_double_complex*)lu->val;
508 #else
509           lu->id.a = lu->val;
510 #endif
511         }
512       }
513       break;
514     case 3:  /* distributed assembled matrix input (size>1) */
515       lu->id.nz_loc = nnz;
516       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
517       if (lu->id.ICNTL(6)>1) {
518 #if defined(PETSC_USE_COMPLEX)
519         lu->id.a_loc = (mumps_double_complex*)lu->val;
520 #else
521         lu->id.a_loc = lu->val;
522 #endif
523       }
524       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
525       IS  is_iden;
526       Vec b;
527       if (!lu->myid){
528         ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr);
529         ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr);
530       } else {
531         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr);
532         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr);
533       }
534       ierr = VecCreate(A->comm,&b);CHKERRQ(ierr);
535       ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr);
536       ierr = VecSetFromOptions(b);CHKERRQ(ierr);
537 
538       ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr);
539       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
540       ierr = VecDestroy(b);CHKERRQ(ierr);
541       break;
542     }
543 #if defined(PETSC_USE_COMPLEX)
544     zmumps_c(&lu->id);
545 #else
546     dmumps_c(&lu->id);
547 #endif
548     if (lu->id.INFOG(1) < 0) {
549       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
550     }
551   }
552 
553   /* numerical factorization phase */
554   /*-------------------------------*/
555   lu->id.job = 2;
556   if(!lu->id.ICNTL(18)) {
557     if (!lu->myid) {
558 #if defined(PETSC_USE_COMPLEX)
559       lu->id.a = (mumps_double_complex*)lu->val;
560 #else
561       lu->id.a = lu->val;
562 #endif
563     }
564   } else {
565 #if defined(PETSC_USE_COMPLEX)
566     lu->id.a_loc = (mumps_double_complex*)lu->val;
567 #else
568     lu->id.a_loc = lu->val;
569 #endif
570   }
571 #if defined(PETSC_USE_COMPLEX)
572   zmumps_c(&lu->id);
573 #else
574   dmumps_c(&lu->id);
575 #endif
576   if (lu->id.INFOG(1) < 0) {
577     if (lu->id.INFO(1) == -13) {
578       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
579     } else {
580       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
581     }
582   }
583 
584   if (!lu->myid && lu->id.ICNTL(16) > 0){
585     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
586   }
587 
588   if (lu->size > 1){
589     if ((*F)->factor == FACTOR_LU){
590       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
591     } else {
592       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
593     }
594     F_diag->assembled = PETSC_TRUE;
595     if (lu->nSolve){
596       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
597       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
598       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
599     }
600   }
601   (*F)->assembled   = PETSC_TRUE;
602   lu->matstruc      = SAME_NONZERO_PATTERN;
603   lu->CleanUpMUMPS  = PETSC_TRUE;
604   lu->nSolve        = 0;
605   PetscFunctionReturn(0);
606 }
607 
608 /* Note the Petsc r and c permutations are ignored */
609 #undef __FUNCT__
610 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
611 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
612   Mat            B;
613   Mat_MUMPS      *lu;
614   PetscErrorCode ierr;
615 
616   PetscFunctionBegin;
617   /* Create the factorization matrix */
618   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
619   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
620   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
621   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
622   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
623 
624   B->ops->lufactornumeric = MatFactorNumeric_MUMPS;
625   B->factor               = FACTOR_LU;
626   lu                      = (Mat_MUMPS*)B->spptr;
627   lu->sym                 = 0;
628   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
629 
630   *F = B;
631   PetscFunctionReturn(0);
632 }
633 
634 /* Note the Petsc r permutation is ignored */
635 #undef __FUNCT__
636 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
637 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
638   Mat            B;
639   Mat_MUMPS      *lu;
640   PetscErrorCode ierr;
641 
642   PetscFunctionBegin;
643   /* Create the factorization matrix */
644   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
645   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
646   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
647   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
648   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
649 
650   B->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
651   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
652   B->factor                     = FACTOR_CHOLESKY;
653   lu                            = (Mat_MUMPS*)B->spptr;
654   lu->sym                       = 2;
655   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
656 
657   *F = B;
658   PetscFunctionReturn(0);
659 }
660 
661 #undef __FUNCT__
662 #define __FUNCT__ "MatFactorInfo_MUMPS"
663 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
664   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
665   PetscErrorCode ierr;
666 
667   PetscFunctionBegin;
668   /* check if matrix is mumps type */
669   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
670 
671   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
672   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
673   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
674   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));CHKERRQ(ierr);
675   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr);
676   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));CHKERRQ(ierr);
677   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
678   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
679   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
680   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
681   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));CHKERRQ(ierr);
682   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
683   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
684   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
685   if (!lu->myid && lu->id.ICNTL(11)>0) {
686     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
687     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
688     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
689     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
690     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
691     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
692 
693   }
694   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
695   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
696   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
697   /* ICNTL(15-17) not used */
698   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
699   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));CHKERRQ(ierr);
700   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));CHKERRQ(ierr);
701   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));CHKERRQ(ierr);
702 
703   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
704   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
705   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
706   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));CHKERRQ(ierr);
707 
708   /* infomation local to each processor */
709   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);}
710   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
711   ierr = PetscSynchronizedFlush(A->comm);
712   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);}
713   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
714   ierr = PetscSynchronizedFlush(A->comm);
715   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);}
716   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
717   ierr = PetscSynchronizedFlush(A->comm);
718   /*
719   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);}
720   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr);
721   ierr = PetscSynchronizedFlush(A->comm);
722   */
723 
724   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);}
725   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr);
726   ierr = PetscSynchronizedFlush(A->comm);
727 
728   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);}
729   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr);
730   ierr = PetscSynchronizedFlush(A->comm);
731 
732   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);}
733   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr);
734   ierr = PetscSynchronizedFlush(A->comm);
735 
736   if (!lu->myid){ /* information from the host */
737     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
738     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
739     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
740 
741     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
742     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
743     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
744     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
745     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
746     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
747     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
748     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
749     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
750     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
751     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
752     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
753     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
754     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
755     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
756     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
757     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
758      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
759      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr);
760      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr);
761      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr);
762      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr);
763      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr);
764   }
765 
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatView_MUMPS"
771 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
772   PetscErrorCode    ierr;
773   PetscTruth        iascii;
774   PetscViewerFormat format;
775   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
776 
777   PetscFunctionBegin;
778   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
779 
780   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
781   if (iascii) {
782     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
783     if (format == PETSC_VIEWER_ASCII_INFO){
784       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
785     }
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
792 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
793   PetscErrorCode ierr;
794   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
795 
796   PetscFunctionBegin;
797   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
798 
799   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
800   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
801   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
802   PetscFunctionReturn(0);
803 }
804 
805 EXTERN_C_BEGIN
806 #undef __FUNCT__
807 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
808 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
809 {
810   PetscErrorCode ierr;
811   PetscMPIInt    size;
812   MPI_Comm       comm;
813   Mat            B=*newmat;
814   Mat_MUMPS      *mumps;
815 
816   PetscFunctionBegin;
817   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
818   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
819 
820   if (reuse == MAT_INITIAL_MATRIX) {
821     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
822     /* A may have special container that is not duplicated,
823        e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */
824     mumps->MatDuplicate              = B->ops->duplicate;
825     mumps->MatView                   = B->ops->view;
826     mumps->MatAssemblyEnd            = B->ops->assemblyend;
827     mumps->MatLUFactorSymbolic       = B->ops->lufactorsymbolic;
828     mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
829     mumps->MatDestroy                = B->ops->destroy;
830   } else {
831     mumps->MatDuplicate              = A->ops->duplicate;
832     mumps->MatView                   = A->ops->view;
833     mumps->MatAssemblyEnd            = A->ops->assemblyend;
834     mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
835     mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
836     mumps->MatDestroy                = A->ops->destroy;
837   }
838   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
839   mumps->CleanUpMUMPS              = PETSC_FALSE;
840   mumps->isAIJ                     = PETSC_TRUE;
841 
842   B->spptr                         = (void*)mumps;
843   B->ops->duplicate                = MatDuplicate_MUMPS;
844   B->ops->view                     = MatView_MUMPS;
845   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
846   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
847   B->ops->destroy                  = MatDestroy_MUMPS;
848 
849   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
850   if (size == 1) {
851     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
852                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
853     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
854                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
855   } else {
856     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
857                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
858     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
859                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
860   }
861 
862   ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr);
863   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
864   *newmat = B;
865   PetscFunctionReturn(0);
866 }
867 EXTERN_C_END
868 
869 /*MC
870   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
871   and sequential matrices via the external package MUMPS.
872 
873   If MUMPS is installed (see the manual for instructions
874   on how to declare the existence of external packages),
875   a matrix type can be constructed which invokes MUMPS solvers.
876   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
877 
878   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
879   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
880   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
881   for communicators controlling multiple processes.  It is recommended that you call both of
882   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
883   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
884   without data copy.
885 
886   Options Database Keys:
887 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
888 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
889 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
890 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
891 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
892 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
893 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
894 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
895 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
896 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
897 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
898 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
899 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
900 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
901 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
902 
903   Level: beginner
904 
905 .seealso: MATSBAIJMUMPS
906 M*/
907 
908 EXTERN_C_BEGIN
909 #undef __FUNCT__
910 #define __FUNCT__ "MatCreate_AIJMUMPS"
911 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
912 {
913   PetscErrorCode ierr;
914   PetscMPIInt    size;
915 
916   PetscFunctionBegin;
917   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
918   if (size == 1) {
919     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
920   } else {
921     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
922     /*
923     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
924     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
925     */
926   }
927   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 EXTERN_C_END
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
934 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
935 {
936   PetscErrorCode ierr;
937   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
938 
939   PetscFunctionBegin;
940   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
941   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
942   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
943   PetscFunctionReturn(0);
944 }
945 
946 EXTERN_C_BEGIN
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
949 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
950 {
951   Mat       A;
952   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
953   PetscErrorCode ierr;
954 
955   PetscFunctionBegin;
956   /*
957     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
958     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
959     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
960     block size info so that PETSc can determine the local size properly.  The block size info is set
961     in the preallocation routine.
962   */
963   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
964   A    = ((Mat_MPISBAIJ *)B->data)->A;
965   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 EXTERN_C_END
969 
970 EXTERN_C_BEGIN
971 #undef __FUNCT__
972 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
973 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
974 {
975   PetscErrorCode ierr;
976   PetscMPIInt    size;
977   MPI_Comm       comm;
978   Mat            B=*newmat;
979   Mat_MUMPS      *mumps;
980   void           (*f)(void);
981 
982   PetscFunctionBegin;
983   if (reuse == MAT_INITIAL_MATRIX) {
984     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
985   }
986 
987   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
988   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
989 
990   mumps->MatDuplicate              = A->ops->duplicate;
991   mumps->MatView                   = A->ops->view;
992   mumps->MatAssemblyEnd            = A->ops->assemblyend;
993   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
994   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
995   mumps->MatDestroy                = A->ops->destroy;
996   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
997   mumps->CleanUpMUMPS              = PETSC_FALSE;
998   mumps->isAIJ                     = PETSC_FALSE;
999 
1000   B->spptr                         = (void*)mumps;
1001   B->ops->duplicate                = MatDuplicate_MUMPS;
1002   B->ops->view                     = MatView_MUMPS;
1003   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
1004   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
1005   B->ops->destroy                  = MatDestroy_MUMPS;
1006 
1007   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1008   if (size == 1) {
1009     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
1010                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1011     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1012                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1013   } else {
1014   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1015     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
1016     if (f) { /* This case should always be true when this routine is called */
1017       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1018       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1019                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1020                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
1021     }
1022     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1023                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1024     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1025                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1026   }
1027 
1028   ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr);
1029   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
1030   *newmat = B;
1031   PetscFunctionReturn(0);
1032 }
1033 EXTERN_C_END
1034 
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatDuplicate_MUMPS"
1037 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1038   PetscErrorCode ierr;
1039   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
1040 
1041   PetscFunctionBegin;
1042   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
1043   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 /*MC
1048   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1049   distributed and sequential matrices via the external package MUMPS.
1050 
1051   If MUMPS is installed (see the manual for instructions
1052   on how to declare the existence of external packages),
1053   a matrix type can be constructed which invokes MUMPS solvers.
1054   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
1055 
1056   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1057   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1058   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
1059   for communicators controlling multiple processes.  It is recommended that you call both of
1060   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1061   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1062   without data copy.
1063 
1064   Options Database Keys:
1065 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1066 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1067 . -mat_mumps_icntl_4 <0,...,4> - print level
1068 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1069 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1070 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1071 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1072 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1073 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1074 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1075 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1076 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1077 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1078 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1079 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1080 
1081   Level: beginner
1082 
1083 .seealso: MATAIJMUMPS
1084 M*/
1085 
1086 EXTERN_C_BEGIN
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1089 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1090 {
1091   PetscErrorCode ierr;
1092   PetscMPIInt    size;
1093 
1094   PetscFunctionBegin;
1095   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1096   if (size == 1) {
1097     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1098   } else {
1099     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1100   }
1101   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 EXTERN_C_END
1105