xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 
2 /*
3     Provides an interface to the MUMPS_4.3.1 sparse solver
4 */
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #include "zmumps_c.h"
13 #else
14 #include "dmumps_c.h"
15 #endif
16 EXTERN_C_END
17 #define JOB_INIT -1
18 #define JOB_END -2
19 /* macros s.t. indices match MUMPS documentation */
20 #define ICNTL(I) icntl[(I)-1]
21 #define CNTL(I) cntl[(I)-1]
22 #define INFOG(I) infog[(I)-1]
23 #define INFO(I) info[(I)-1]
24 #define RINFOG(I) rinfog[(I)-1]
25 #define RINFO(I) rinfo[(I)-1]
26 
27 typedef struct {
28 #if defined(PETSC_USE_COMPLEX)
29   ZMUMPS_STRUC_C id;
30 #else
31   DMUMPS_STRUC_C id;
32 #endif
33   MatStructure   matstruc;
34   int            myid,size,*irn,*jcn,sym;
35   PetscScalar    *val;
36   MPI_Comm       comm_mumps;
37 
38   PetscTruth     isAIJ,CleanUpMUMPS;
39   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
40   PetscErrorCode (*MatView)(Mat,PetscViewer);
41   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
42   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
43   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
44   PetscErrorCode (*MatDestroy)(Mat);
45   PetscErrorCode (*specialdestroy)(Mat);
46   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
47 } Mat_MUMPS;
48 
49 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
50 EXTERN_C_BEGIN
51 PetscErrorCode MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*);
52 EXTERN_C_END
53 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
54 /*
55   input:
56     A       - matrix in mpiaij or mpisbaij (bs=1) format
57     shift   - 0: C style output triple; 1: Fortran style output triple.
58     valOnly - FALSE: spaces are allocated and values are set for the triple
59               TRUE:  only the values in v array are updated
60   output:
61     nnz     - dim of r, c, and v (number of local nonzero entries of A)
62     r, c, v - row and col index, matrix values (matrix triples)
63  */
64 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
65   int         *ai, *aj, *bi, *bj, rstart,nz, *garray;
66   PetscErrorCode ierr;
67   int         i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol;
68   int         *row,*col;
69   PetscScalar *av, *bv,*val;
70   Mat_MUMPS   *mumps=(Mat_MUMPS*)A->spptr;
71 
72   PetscFunctionBegin;
73   if (mumps->isAIJ){
74     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
75     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
76     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
77     nz = aa->nz + bb->nz;
78     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
79     garray = mat->garray;
80     av=aa->a; bv=bb->a;
81 
82   } else {
83     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
84     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
85     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
86     if (mat->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", mat->bs);
87     nz = aa->nz + bb->nz;
88     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
89     garray = mat->garray;
90     av=aa->a; bv=bb->a;
91   }
92 
93   if (!valOnly){
94     ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr);
95     ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr);
96     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
97     *r = row; *c = col; *v = val;
98   } else {
99     row = *r; col = *c; val = *v;
100   }
101   *nnz = nz;
102 
103   jj = 0; irow = rstart;
104   for ( i=0; i<m; i++ ) {
105     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
106     countA = ai[i+1] - ai[i];
107     countB = bi[i+1] - bi[i];
108     bjj = bj + bi[i];
109 
110     /* get jB, the starting local col index for the 2nd B-part */
111     colA_start = rstart + ajj[0]; /* the smallest col index for A */
112     j=-1;
113     do {
114       j++;
115       if (j == countB) break;
116       jcol = garray[bjj[j]];
117     } while (jcol < colA_start);
118     jB = j;
119 
120     /* B-part, smaller col index */
121     colA_start = rstart + ajj[0]; /* the smallest col index for A */
122     for (j=0; j<jB; j++){
123       jcol = garray[bjj[j]];
124       if (!valOnly){
125         row[jj] = irow + shift; col[jj] = jcol + shift;
126 
127       }
128       val[jj++] = *bv++;
129     }
130     /* A-part */
131     for (j=0; j<countA; j++){
132       if (!valOnly){
133         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
134       }
135       val[jj++] = *av++;
136     }
137     /* B-part, larger col index */
138     for (j=jB; j<countB; j++){
139       if (!valOnly){
140         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
141       }
142       val[jj++] = *bv++;
143     }
144     irow++;
145   }
146 
147   PetscFunctionReturn(0);
148 }
149 
150 EXTERN_C_BEGIN
151 #undef __FUNCT__
152 #define __FUNCT__ "MatConvert_MUMPS_Base"
153 PetscErrorCode MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) {
154   PetscErrorCode ierr;
155   Mat       B=*newmat;
156   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
157 
158   PetscFunctionBegin;
159   if (B != A) {
160     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
161   }
162   B->ops->duplicate              = mumps->MatDuplicate;
163   B->ops->view                   = mumps->MatView;
164   B->ops->assemblyend            = mumps->MatAssemblyEnd;
165   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
166   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
167   B->ops->destroy                = mumps->MatDestroy;
168   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
169   ierr = PetscFree(mumps);CHKERRQ(ierr);
170   *newmat = B;
171   PetscFunctionReturn(0);
172 }
173 EXTERN_C_END
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatDestroy_MUMPS"
177 PetscErrorCode MatDestroy_MUMPS(Mat A)
178 {
179   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
180   PetscErrorCode ierr;
181   int       size=lu->size;
182   PetscErrorCode (*specialdestroy)(Mat);
183   PetscFunctionBegin;
184   if (lu->CleanUpMUMPS) {
185     /* Terminate instance, deallocate memories */
186     lu->id.job=JOB_END;
187 #if defined(PETSC_USE_COMPLEX)
188     zmumps_c(&lu->id);
189 #else
190     dmumps_c(&lu->id);
191 #endif
192     if (lu->irn) {
193       ierr = PetscFree(lu->irn);CHKERRQ(ierr);
194     }
195     if (lu->jcn) {
196       ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
197     }
198     if (size>1 && lu->val) {
199       ierr = PetscFree(lu->val);CHKERRQ(ierr);
200     }
201     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
202   }
203   specialdestroy = lu->specialdestroy;
204   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
205   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatDestroy_AIJMUMPS"
211 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
212 {
213   PetscErrorCode ierr;
214   int  size;
215 
216   PetscFunctionBegin;
217   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
218   if (size==1) {
219     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
220   } else {
221     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
222   }
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
228 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
229 {
230   PetscErrorCode ierr;
231   int  size;
232 
233   PetscFunctionBegin;
234   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
235   if (size==1) {
236     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr);
237   } else {
238     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr);
239   }
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatFactorInfo_MUMPS"
245 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
246   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
247   PetscErrorCode ierr;
248 
249   PetscFunctionBegin;
250   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
251   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
252   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
253   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
254   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
255   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
256   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
257   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
258   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
259   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
260   if (!lu->myid && lu->id.ICNTL(11)>0) {
261     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
262     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
263     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
264     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
265     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
266     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
267 
268   }
269   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
270   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
271   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (efficiency control):                         %d \n",lu->id.ICNTL(15));CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
274 
275   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
277   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
278 
279   /* infomation local to each processor */
280   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
281   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
282   ierr = PetscSynchronizedFlush(A->comm);
283   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
284   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
285   ierr = PetscSynchronizedFlush(A->comm);
286   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
287   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
288   ierr = PetscSynchronizedFlush(A->comm);
289 
290   if (!lu->myid){ /* information from the host */
291     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
292     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
293     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
294 
295     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
296     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
297     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
298     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
299     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
300     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
301     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
302     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
303     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
304     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
305     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
306     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
307     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
308     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
309     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
310     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
311     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
312      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
313   }
314 
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatView_AIJMUMPS"
320 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) {
321   PetscErrorCode ierr;
322   PetscTruth        iascii;
323   PetscViewerFormat format;
324   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
325 
326   PetscFunctionBegin;
327   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
328 
329   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
330   if (iascii) {
331     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
332     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
333       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
334     }
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "MatSolve_AIJMUMPS"
341 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
342   Mat_MUMPS   *lu=(Mat_MUMPS*)A->spptr;
343   PetscScalar *array;
344   Vec         x_seq;
345   IS          iden;
346   VecScatter  scat;
347   PetscErrorCode ierr;
348 
349   PetscFunctionBegin;
350   if (lu->size > 1){
351     if (!lu->myid){
352       ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr);
353       ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr);
354     } else {
355       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr);
356       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr);
357     }
358     ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr);
359     ierr = ISDestroy(iden);CHKERRQ(ierr);
360 
361     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
362     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
363     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
364   } else {  /* size == 1 */
365     ierr = VecCopy(b,x);CHKERRQ(ierr);
366     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
367   }
368   if (!lu->myid) { /* define rhs on the host */
369 #if defined(PETSC_USE_COMPLEX)
370     lu->id.rhs = (mumps_double_complex*)array;
371 #else
372     lu->id.rhs = array;
373 #endif
374   }
375 
376   /* solve phase */
377   lu->id.job=3;
378 #if defined(PETSC_USE_COMPLEX)
379   zmumps_c(&lu->id);
380 #else
381   dmumps_c(&lu->id);
382 #endif
383   if (lu->id.INFOG(1) < 0) {
384     SETERRQ1(1,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
385   }
386 
387   /* convert mumps solution x_seq to petsc mpi x */
388   if (lu->size > 1) {
389     if (!lu->myid){
390       ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
391     }
392     ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
393     ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
394     ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
395     ierr = VecDestroy(x_seq);CHKERRQ(ierr);
396   } else {
397     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
398   }
399 
400   PetscFunctionReturn(0);
401 }
402 
403 /*
404   input:
405    F:        numeric factor
406   output:
407    nneg:     total number of negative pivots
408    nzero:    0
409    npos:     (global dimension of F) - nneg
410 */
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
414 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
415 {
416   Mat_MUMPS  *lu =(Mat_MUMPS*)F->spptr;
417   PetscErrorCode ierr;
418   int        size;
419 
420   PetscFunctionBegin;
421   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
422   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
423   if (size > 1 && lu->id.ICNTL(13) != 1){
424     SETERRQ1(1,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
425   }
426   if (nneg){
427     if (!lu->myid){
428       *nneg = lu->id.INFOG(12);
429     }
430     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
431   }
432   if (nzero) *nzero = 0;
433   if (npos)  *npos  = F->M - (*nneg);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatFactorNumeric_MPIAIJMUMPS"
439 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) {
440   Mat_MUMPS  *lu =(Mat_MUMPS*)(*F)->spptr;
441   Mat_MUMPS  *lua=(Mat_MUMPS*)(A)->spptr;
442   PetscErrorCode ierr;
443   int        rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl;
444   PetscTruth valOnly,flg;
445 
446   PetscFunctionBegin;
447   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
448     (*F)->ops->solve    = MatSolve_AIJMUMPS;
449 
450     /* Initialize a MUMPS instance */
451     ierr = MPI_Comm_rank(A->comm, &lu->myid);
452     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
453     lua->myid = lu->myid; lua->size = lu->size;
454     lu->id.job = JOB_INIT;
455     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
456     lu->id.comm_fortran = lu->comm_mumps;
457 
458     /* Set mumps options */
459     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
460     lu->id.par=1;  /* host participates factorizaton and solve */
461     lu->id.sym=lu->sym;
462     if (lu->sym == 2){
463       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
464       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
465     }
466 #if defined(PETSC_USE_COMPLEX)
467   zmumps_c(&lu->id);
468 #else
469   dmumps_c(&lu->id);
470 #endif
471 
472     if (lu->size == 1){
473       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
474     } else {
475       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
476     }
477 
478     icntl=-1;
479     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
480     if (flg && icntl > 0) {
481       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
482     } else { /* no output */
483       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
484       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
485       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
486       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
487     }
488     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
489     icntl=-1;
490     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
491     if (flg) {
492       if (icntl== 1){
493         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
494       } else {
495         lu->id.ICNTL(7) = icntl;
496       }
497     }
498     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
499     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
500     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
501     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
502     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
503     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
504     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
505 
506     /*
507     ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr);
508     if (flg){
509       if (icntl >-1 && icntl <3 ){
510         if (lu->myid==0) lu->id.ICNTL(16) = icntl;
511       } else {
512         SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl);
513       }
514     }
515     */
516 
517     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
518     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
519     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
520     PetscOptionsEnd();
521   }
522 
523   /* define matrix A */
524   switch (lu->id.ICNTL(18)){
525   case 0:  /* centralized assembled matrix input (size=1) */
526     if (!lu->myid) {
527       if (lua->isAIJ){
528         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
529         nz               = aa->nz;
530         ai = aa->i; aj = aa->j; lu->val = aa->a;
531       } else {
532         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
533         nz                  =  aa->nz;
534         ai = aa->i; aj = aa->j; lu->val = aa->a;
535       }
536       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
537         ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr);
538         ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr);
539         nz = 0;
540         for (i=0; i<M; i++){
541           rnz = ai[i+1] - ai[i];
542           while (rnz--) {  /* Fortran row/col index! */
543             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
544           }
545         }
546       }
547     }
548     break;
549   case 3:  /* distributed assembled matrix input (size>1) */
550     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
551       valOnly = PETSC_FALSE;
552     } else {
553       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
554     }
555     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
556     break;
557   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
558   }
559 
560   /* analysis phase */
561   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
562      lu->id.n = M;
563     switch (lu->id.ICNTL(18)){
564     case 0:  /* centralized assembled matrix input */
565       if (!lu->myid) {
566         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
567         if (lu->id.ICNTL(6)>1){
568 #if defined(PETSC_USE_COMPLEX)
569           lu->id.a = (mumps_double_complex*)lu->val;
570 #else
571           lu->id.a = lu->val;
572 #endif
573         }
574       }
575       break;
576     case 3:  /* distributed assembled matrix input (size>1) */
577       lu->id.nz_loc = nnz;
578       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
579       if (lu->id.ICNTL(6)>1) {
580 #if defined(PETSC_USE_COMPLEX)
581         lu->id.a_loc = (mumps_double_complex*)lu->val;
582 #else
583         lu->id.a_loc = lu->val;
584 #endif
585       }
586       break;
587     }
588     lu->id.job=1;
589 #if defined(PETSC_USE_COMPLEX)
590   zmumps_c(&lu->id);
591 #else
592   dmumps_c(&lu->id);
593 #endif
594     if (lu->id.INFOG(1) < 0) {
595       SETERRQ1(1,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
596     }
597   }
598 
599   /* numerical factorization phase */
600   if(!lu->id.ICNTL(18)) {
601     if (!lu->myid) {
602 #if defined(PETSC_USE_COMPLEX)
603       lu->id.a = (mumps_double_complex*)lu->val;
604 #else
605       lu->id.a = lu->val;
606 #endif
607     }
608   } else {
609 #if defined(PETSC_USE_COMPLEX)
610     lu->id.a_loc = (mumps_double_complex*)lu->val;
611 #else
612     lu->id.a_loc = lu->val;
613 #endif
614   }
615   lu->id.job=2;
616 #if defined(PETSC_USE_COMPLEX)
617   zmumps_c(&lu->id);
618 #else
619   dmumps_c(&lu->id);
620 #endif
621   if (lu->id.INFOG(1) < 0) {
622     SETERRQ2(1,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
623   }
624 
625   if (lu->myid==0 && lu->id.ICNTL(16) > 0){
626     SETERRQ1(1,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
627   }
628 
629   (*F)->assembled  = PETSC_TRUE;
630   lu->matstruc     = SAME_NONZERO_PATTERN;
631   lu->CleanUpMUMPS = PETSC_TRUE;
632   PetscFunctionReturn(0);
633 }
634 
635 /* Note the Petsc r and c permutations are ignored */
636 #undef __FUNCT__
637 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
638 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
639   Mat       B;
640   Mat_MUMPS *lu;
641   PetscErrorCode ierr;
642 
643   PetscFunctionBegin;
644 
645   /* Create the factorization matrix */
646   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
647   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
648   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
649   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
650 
651   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
652   B->factor               = FACTOR_LU;
653   lu                      = (Mat_MUMPS*)B->spptr;
654   lu->sym                 = 0;
655   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
656 
657   *F = B;
658   PetscFunctionReturn(0);
659 }
660 
661 /* Note the Petsc r permutation is ignored */
662 #undef __FUNCT__
663 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
664 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
665   Mat       B;
666   Mat_MUMPS *lu;
667   PetscErrorCode ierr;
668 
669   PetscFunctionBegin;
670 
671   /* Create the factorization matrix */
672   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
673   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
674   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
675   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
676 
677   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
678   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
679   B->factor                     = FACTOR_CHOLESKY;
680   lu                            = (Mat_MUMPS*)B->spptr;
681   lu->sym                       = 2;
682   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
683 
684   *F = B;
685   PetscFunctionReturn(0);
686 }
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
690 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
691   PetscErrorCode ierr;
692   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
693 
694   PetscFunctionBegin;
695   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
696 
697   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
698   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
699   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
700   PetscFunctionReturn(0);
701 }
702 
703 EXTERN_C_BEGIN
704 #undef __FUNCT__
705 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
706 PetscErrorCode MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat)\
707 {
708   PetscErrorCode ierr;
709   int       size;
710   MPI_Comm  comm;
711   Mat       B=*newmat;
712   Mat_MUMPS *mumps;
713 
714   PetscFunctionBegin;
715   if (B != A) {
716     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
717   }
718 
719   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
720   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
721 
722   mumps->MatDuplicate              = A->ops->duplicate;
723   mumps->MatView                   = A->ops->view;
724   mumps->MatAssemblyEnd            = A->ops->assemblyend;
725   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
726   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
727   mumps->MatDestroy                = A->ops->destroy;
728   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
729   mumps->CleanUpMUMPS              = PETSC_FALSE;
730   mumps->isAIJ                     = PETSC_TRUE;
731 
732   B->spptr                         = (void*)mumps;
733   B->ops->duplicate                = MatDuplicate_MUMPS;
734   B->ops->view                     = MatView_AIJMUMPS;
735   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
736   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
737   B->ops->destroy                  = MatDestroy_MUMPS;
738 
739   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
740   if (size == 1) {
741     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
742                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
743     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
744                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
745   } else {
746     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
747                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
748     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
749                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
750   }
751 
752   PetscLogInfo(0,"Using MUMPS for LU factorization and solves.");
753   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
754   *newmat = B;
755   PetscFunctionReturn(0);
756 }
757 EXTERN_C_END
758 
759 /*MC
760   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
761   and sequential matrices via the external package MUMPS.
762 
763   If MUMPS is installed (see the manual for instructions
764   on how to declare the existence of external packages),
765   a matrix type can be constructed which invokes MUMPS solvers.
766   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
767   This matrix type is only supported for double precision real.
768 
769   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
770   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
771   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
772   for communicators controlling multiple processes.  It is recommended that you call both of
773   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
774   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
775   without data copy.
776 
777   Options Database Keys:
778 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
779 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
780 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
781 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
782 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
783 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
784 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
785 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
786 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
787 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
788 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
789 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
790 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
791 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
792 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
793 
794   Level: beginner
795 
796 .seealso: MATSBAIJMUMPS
797 M*/
798 
799 EXTERN_C_BEGIN
800 #undef __FUNCT__
801 #define __FUNCT__ "MatCreate_AIJMUMPS"
802 PetscErrorCode MatCreate_AIJMUMPS(Mat A)
803 {
804   PetscErrorCode ierr;
805   int      size;
806   Mat      A_diag;
807   MPI_Comm comm;
808 
809   PetscFunctionBegin;
810   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
811   /*   and AIJMUMPS types */
812   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr);
813   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
814   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
815   if (size == 1) {
816     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
817   } else {
818     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
819     A_diag = ((Mat_MPIAIJ *)A->data)->A;
820     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr);
821   }
822   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 EXTERN_C_END
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
829 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
830 {
831   PetscErrorCode ierr;
832   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
833 
834   PetscFunctionBegin;
835   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
836   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
837   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
838   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
839   PetscFunctionReturn(0);
840 }
841 
842 EXTERN_C_BEGIN
843 #undef __FUNCT__
844 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
845 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat  B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
846 {
847   Mat       A;
848   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
849   PetscErrorCode ierr;
850 
851   PetscFunctionBegin;
852   /*
853     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
854     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
855     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
856     block size info so that PETSc can determine the local size properly.  The block size info is set
857     in the preallocation routine.
858   */
859   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
860   A    = ((Mat_MPISBAIJ *)B->data)->A;
861   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 EXTERN_C_END
865 
866 EXTERN_C_BEGIN
867 #undef __FUNCT__
868 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
869 PetscErrorCode MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat)
870 {
871   PetscErrorCode ierr;
872   int       size;
873   MPI_Comm  comm;
874   Mat       B=*newmat;
875   Mat_MUMPS *mumps;
876   void      (*f)(void);
877 
878   PetscFunctionBegin;
879   if (B != A) {
880     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
881   }
882 
883   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
884   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
885 
886   mumps->MatDuplicate              = A->ops->duplicate;
887   mumps->MatView                   = A->ops->view;
888   mumps->MatAssemblyEnd            = A->ops->assemblyend;
889   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
890   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
891   mumps->MatDestroy                = A->ops->destroy;
892   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
893   mumps->CleanUpMUMPS              = PETSC_FALSE;
894   mumps->isAIJ                     = PETSC_FALSE;
895 
896   B->spptr                         = (void*)mumps;
897   B->ops->duplicate                = MatDuplicate_MUMPS;
898   B->ops->view                     = MatView_AIJMUMPS;
899   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
900   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
901   B->ops->destroy                  = MatDestroy_MUMPS;
902 
903   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
904   if (size == 1) {
905     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
906                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
907     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
908                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
909   } else {
910   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
911     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
912     if (f) {
913       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
914       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
915                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
916                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
917     }
918     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
919                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
920     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
921                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
922   }
923 
924   PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves.");
925   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
926   *newmat = B;
927   PetscFunctionReturn(0);
928 }
929 EXTERN_C_END
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatDuplicate_MUMPS"
933 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
934   PetscErrorCode ierr;
935   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
936 
937   PetscFunctionBegin;
938   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
939   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*MC
944   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
945   distributed and sequential matrices via the external package MUMPS.
946 
947   If MUMPS is installed (see the manual for instructions
948   on how to declare the existence of external packages),
949   a matrix type can be constructed which invokes MUMPS solvers.
950   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
951   This matrix type is only supported for double precision real.
952 
953   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
954   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
955   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
956   for communicators controlling multiple processes.  It is recommended that you call both of
957   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
958   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
959   without data copy.
960 
961   Options Database Keys:
962 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
963 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
964 . -mat_mumps_icntl_4 <0,...,4> - print level
965 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
966 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
967 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
968 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
969 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
970 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
971 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
972 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
973 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
974 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
975 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
976 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
977 
978   Level: beginner
979 
980 .seealso: MATAIJMUMPS
981 M*/
982 
983 EXTERN_C_BEGIN
984 #undef __FUNCT__
985 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
986 PetscErrorCode MatCreate_SBAIJMUMPS(Mat A)
987 {
988   PetscErrorCode ierr;
989   int size;
990 
991   PetscFunctionBegin;
992   /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */
993   /*   and SBAIJMUMPS types */
994   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr);
995   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
996   if (size == 1) {
997     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
998   } else {
999     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1000   }
1001   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 EXTERN_C_END
1005