xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS_4.3.1 sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   int            myid,size,*irn,*jcn,sym;
36   PetscScalar    *val;
37   MPI_Comm       comm_mumps;
38 
39   PetscTruth     isAIJ,CleanUpMUMPS;
40   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
41   PetscErrorCode (*MatView)(Mat,PetscViewer);
42   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
43   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
44   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
45   PetscErrorCode (*MatDestroy)(Mat);
46   PetscErrorCode (*specialdestroy)(Mat);
47   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
48 } Mat_MUMPS;
49 
50 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
51 EXTERN_C_BEGIN
52 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
53 EXTERN_C_END
54 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
55 /*
56   input:
57     A       - matrix in mpiaij or mpisbaij (bs=1) format
58     shift   - 0: C style output triple; 1: Fortran style output triple.
59     valOnly - FALSE: spaces are allocated and values are set for the triple
60               TRUE:  only the values in v array are updated
61   output:
62     nnz     - dim of r, c, and v (number of local nonzero entries of A)
63     r, c, v - row and col index, matrix values (matrix triples)
64  */
65 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
66   int         *ai, *aj, *bi, *bj, rstart,nz, *garray;
67   PetscErrorCode ierr;
68   int         i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol;
69   int         *row,*col;
70   PetscScalar *av, *bv,*val;
71   Mat_MUMPS   *mumps=(Mat_MUMPS*)A->spptr;
72 
73   PetscFunctionBegin;
74   if (mumps->isAIJ){
75     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
76     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
77     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
78     nz = aa->nz + bb->nz;
79     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
80     garray = mat->garray;
81     av=aa->a; bv=bb->a;
82 
83   } else {
84     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
85     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
86     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
87     if (A->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->bs);
88     nz = aa->nz + bb->nz;
89     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
90     garray = mat->garray;
91     av=aa->a; bv=bb->a;
92   }
93 
94   if (!valOnly){
95     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
96     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
97     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
98     *r = row; *c = col; *v = val;
99   } else {
100     row = *r; col = *c; val = *v;
101   }
102   *nnz = nz;
103 
104   jj = 0; irow = rstart;
105   for ( i=0; i<m; i++ ) {
106     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
107     countA = ai[i+1] - ai[i];
108     countB = bi[i+1] - bi[i];
109     bjj = bj + bi[i];
110 
111     /* get jB, the starting local col index for the 2nd B-part */
112     colA_start = rstart + ajj[0]; /* the smallest col index for A */
113     j=-1;
114     do {
115       j++;
116       if (j == countB) break;
117       jcol = garray[bjj[j]];
118     } while (jcol < colA_start);
119     jB = j;
120 
121     /* B-part, smaller col index */
122     colA_start = rstart + ajj[0]; /* the smallest col index for A */
123     for (j=0; j<jB; j++){
124       jcol = garray[bjj[j]];
125       if (!valOnly){
126         row[jj] = irow + shift; col[jj] = jcol + shift;
127 
128       }
129       val[jj++] = *bv++;
130     }
131     /* A-part */
132     for (j=0; j<countA; j++){
133       if (!valOnly){
134         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
135       }
136       val[jj++] = *av++;
137     }
138     /* B-part, larger col index */
139     for (j=jB; j<countB; j++){
140       if (!valOnly){
141         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
142       }
143       val[jj++] = *bv++;
144     }
145     irow++;
146   }
147 
148   PetscFunctionReturn(0);
149 }
150 
151 EXTERN_C_BEGIN
152 #undef __FUNCT__
153 #define __FUNCT__ "MatConvert_MUMPS_Base"
154 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \
155 {
156   PetscErrorCode ierr;
157   Mat            B=*newmat;
158   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
159   void           (*f)(void);
160 
161   PetscFunctionBegin;
162   if (reuse == MAT_INITIAL_MATRIX) {
163     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
164   }
165   B->ops->duplicate              = mumps->MatDuplicate;
166   B->ops->view                   = mumps->MatView;
167   B->ops->assemblyend            = mumps->MatAssemblyEnd;
168   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
169   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
170   B->ops->destroy                = mumps->MatDestroy;
171 
172   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
173   if (f) {
174     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(FCNVOID)mumps->MatPreallocate);CHKERRQ(ierr);
175   }
176   ierr = PetscFree(mumps);CHKERRQ(ierr);
177 
178   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
186 
187   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
188   *newmat = B;
189   PetscFunctionReturn(0);
190 }
191 EXTERN_C_END
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatDestroy_MUMPS"
195 PetscErrorCode MatDestroy_MUMPS(Mat A)
196 {
197   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
198   PetscErrorCode ierr;
199   int       size=lu->size;
200   PetscErrorCode (*specialdestroy)(Mat);
201   PetscFunctionBegin;
202   if (lu->CleanUpMUMPS) {
203     /* Terminate instance, deallocate memories */
204     lu->id.job=JOB_END;
205 #if defined(PETSC_USE_COMPLEX)
206     zmumps_c(&lu->id);
207 #else
208     dmumps_c(&lu->id);
209 #endif
210     if (lu->irn) {
211       ierr = PetscFree(lu->irn);CHKERRQ(ierr);
212     }
213     if (lu->jcn) {
214       ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
215     }
216     if (size>1 && lu->val) {
217       ierr = PetscFree(lu->val);CHKERRQ(ierr);
218     }
219     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
220   }
221   specialdestroy = lu->specialdestroy;
222   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
223   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 #undef __FUNCT__
228 #define __FUNCT__ "MatDestroy_AIJMUMPS"
229 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
230 {
231   PetscErrorCode ierr;
232   int  size;
233 
234   PetscFunctionBegin;
235   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
236   if (size==1) {
237     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
238   } else {
239     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
246 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
247 {
248   PetscErrorCode ierr;
249   int  size;
250 
251   PetscFunctionBegin;
252   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
253   if (size==1) {
254     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
255   } else {
256     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatFactorInfo_MUMPS"
263 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
264   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
265   PetscErrorCode ierr;
266 
267   PetscFunctionBegin;
268   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
269   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
270   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
271   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
274   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
275   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
277   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
278   if (!lu->myid && lu->id.ICNTL(11)>0) {
279     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
280     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
281     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
282     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
283     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
284     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
285 
286   }
287   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
288   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
289   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
290   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (efficiency control):                         %d \n",lu->id.ICNTL(15));CHKERRQ(ierr);
291   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
292 
293   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
294   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
295   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
296 
297   /* infomation local to each processor */
298   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
299   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
300   ierr = PetscSynchronizedFlush(A->comm);
301   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
302   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
303   ierr = PetscSynchronizedFlush(A->comm);
304   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
305   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
306   ierr = PetscSynchronizedFlush(A->comm);
307 
308   if (!lu->myid){ /* information from the host */
309     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
310     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
311     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
312 
313     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
314     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
315     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
316     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
317     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
318     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
319     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
320     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
321     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
322     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
323     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
324     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
325     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
326     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
327     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
328     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
330      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
331   }
332 
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatView_AIJMUMPS"
338 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) {
339   PetscErrorCode ierr;
340   PetscTruth        iascii;
341   PetscViewerFormat format;
342   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
343 
344   PetscFunctionBegin;
345   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
346 
347   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
348   if (iascii) {
349     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
350     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
351       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatSolve_AIJMUMPS"
359 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
360   Mat_MUMPS   *lu=(Mat_MUMPS*)A->spptr;
361   PetscScalar *array;
362   Vec         x_seq;
363   IS          iden;
364   VecScatter  scat;
365   PetscErrorCode ierr;
366 
367   PetscFunctionBegin;
368   if (lu->size > 1){
369     if (!lu->myid){
370       ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr);
371       ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr);
372     } else {
373       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr);
374       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr);
375     }
376     ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr);
377     ierr = ISDestroy(iden);CHKERRQ(ierr);
378 
379     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
380     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
381     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
382   } else {  /* size == 1 */
383     ierr = VecCopy(b,x);CHKERRQ(ierr);
384     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
385   }
386   if (!lu->myid) { /* define rhs on the host */
387 #if defined(PETSC_USE_COMPLEX)
388     lu->id.rhs = (mumps_double_complex*)array;
389 #else
390     lu->id.rhs = array;
391 #endif
392   }
393 
394   /* solve phase */
395   lu->id.job=3;
396 #if defined(PETSC_USE_COMPLEX)
397   zmumps_c(&lu->id);
398 #else
399   dmumps_c(&lu->id);
400 #endif
401   if (lu->id.INFOG(1) < 0) {
402     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
403   }
404 
405   /* convert mumps solution x_seq to petsc mpi x */
406   if (lu->size > 1) {
407     if (!lu->myid){
408       ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
409     }
410     ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
411     ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
412     ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
413     ierr = VecDestroy(x_seq);CHKERRQ(ierr);
414   } else {
415     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
416   }
417 
418   PetscFunctionReturn(0);
419 }
420 
421 /*
422   input:
423    F:        numeric factor
424   output:
425    nneg:     total number of negative pivots
426    nzero:    0
427    npos:     (global dimension of F) - nneg
428 */
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
432 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
433 {
434   Mat_MUMPS  *lu =(Mat_MUMPS*)F->spptr;
435   PetscErrorCode ierr;
436   int        size;
437 
438   PetscFunctionBegin;
439   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
440   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
441   if (size > 1 && lu->id.ICNTL(13) != 1){
442     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
443   }
444   if (nneg){
445     if (!lu->myid){
446       *nneg = lu->id.INFOG(12);
447     }
448     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
449   }
450   if (nzero) *nzero = 0;
451   if (npos)  *npos  = F->M - (*nneg);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS"
457 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F)
458 {
459   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
460   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
461   PetscErrorCode ierr;
462   PetscInt       rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl;
463   PetscTruth     valOnly,flg;
464 
465   PetscFunctionBegin;
466   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
467     (*F)->ops->solve    = MatSolve_AIJMUMPS;
468 
469     /* Initialize a MUMPS instance */
470     ierr = MPI_Comm_rank(A->comm, &lu->myid);
471     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
472     lua->myid = lu->myid; lua->size = lu->size;
473     lu->id.job = JOB_INIT;
474     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
475     lu->id.comm_fortran = lu->comm_mumps;
476 
477     /* Set mumps options */
478     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
479     lu->id.par=1;  /* host participates factorizaton and solve */
480     lu->id.sym=lu->sym;
481     if (lu->sym == 2){
482       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
483       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
484     }
485 #if defined(PETSC_USE_COMPLEX)
486     zmumps_c(&lu->id);
487 #else
488     dmumps_c(&lu->id);
489 #endif
490 
491     if (lu->size == 1){
492       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
493     } else {
494       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
495     }
496 
497     icntl=-1;
498     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
499     if ((flg && icntl > 0) || PetscLogPrintInfo) {
500       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
501     } else { /* no output */
502       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
503       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
504       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
505       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
506     }
507     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
508     icntl=-1;
509     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
510     if (flg) {
511       if (icntl== 1){
512         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
513       } else {
514         lu->id.ICNTL(7) = icntl;
515       }
516     }
517     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
518     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
519     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
520     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
521     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
522     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
523     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
524 
525     /*
526     ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr);
527     if (flg){
528       if (icntl >-1 && icntl <3 ){
529         if (lu->myid==0) lu->id.ICNTL(16) = icntl;
530       } else {
531         SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl);
532       }
533     }
534     */
535 
536     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
537     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
538     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
539     PetscOptionsEnd();
540   }
541 
542   /* define matrix A */
543   switch (lu->id.ICNTL(18)){
544   case 0:  /* centralized assembled matrix input (size=1) */
545     if (!lu->myid) {
546       if (lua->isAIJ){
547         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
548         nz               = aa->nz;
549         ai = aa->i; aj = aa->j; lu->val = aa->a;
550       } else {
551         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
552         nz                  =  aa->nz;
553         ai = aa->i; aj = aa->j; lu->val = aa->a;
554       }
555       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
556         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
557         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
558         nz = 0;
559         for (i=0; i<M; i++){
560           rnz = ai[i+1] - ai[i];
561           while (rnz--) {  /* Fortran row/col index! */
562             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
563           }
564         }
565       }
566     }
567     break;
568   case 3:  /* distributed assembled matrix input (size>1) */
569     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
570       valOnly = PETSC_FALSE;
571     } else {
572       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
573     }
574     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
575     break;
576   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
577   }
578 
579   /* analysis phase */
580   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
581      lu->id.n = M;
582     switch (lu->id.ICNTL(18)){
583     case 0:  /* centralized assembled matrix input */
584       if (!lu->myid) {
585         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
586         if (lu->id.ICNTL(6)>1){
587 #if defined(PETSC_USE_COMPLEX)
588           lu->id.a = (mumps_double_complex*)lu->val;
589 #else
590           lu->id.a = lu->val;
591 #endif
592         }
593       }
594       break;
595     case 3:  /* distributed assembled matrix input (size>1) */
596       lu->id.nz_loc = nnz;
597       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
598       if (lu->id.ICNTL(6)>1) {
599 #if defined(PETSC_USE_COMPLEX)
600         lu->id.a_loc = (mumps_double_complex*)lu->val;
601 #else
602         lu->id.a_loc = lu->val;
603 #endif
604       }
605       break;
606     }
607     lu->id.job=1;
608 #if defined(PETSC_USE_COMPLEX)
609     zmumps_c(&lu->id);
610 #else
611     dmumps_c(&lu->id);
612 #endif
613     if (lu->id.INFOG(1) < 0) {
614       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
615     }
616   }
617 
618   /* numerical factorization phase */
619   if(!lu->id.ICNTL(18)) {
620     if (!lu->myid) {
621 #if defined(PETSC_USE_COMPLEX)
622       lu->id.a = (mumps_double_complex*)lu->val;
623 #else
624       lu->id.a = lu->val;
625 #endif
626     }
627   } else {
628 #if defined(PETSC_USE_COMPLEX)
629     lu->id.a_loc = (mumps_double_complex*)lu->val;
630 #else
631     lu->id.a_loc = lu->val;
632 #endif
633   }
634   lu->id.job=2;
635 #if defined(PETSC_USE_COMPLEX)
636   zmumps_c(&lu->id);
637 #else
638   dmumps_c(&lu->id);
639 #endif
640   if (lu->id.INFOG(1) < 0) {
641     if (lu->id.INFO(1) == -13) {
642       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
643     } else {
644       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
645     }
646   }
647 
648   if (!lu->myid && lu->id.ICNTL(16) > 0){
649     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
650   }
651 
652   (*F)->assembled  = PETSC_TRUE;
653   lu->matstruc     = SAME_NONZERO_PATTERN;
654   lu->CleanUpMUMPS = PETSC_TRUE;
655   PetscFunctionReturn(0);
656 }
657 
658 /* Note the Petsc r and c permutations are ignored */
659 #undef __FUNCT__
660 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
661 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
662   Mat            B;
663   Mat_MUMPS      *lu;
664   PetscErrorCode ierr;
665 
666   PetscFunctionBegin;
667 
668   /* Create the factorization matrix */
669   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
670   ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
671   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
672   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
673   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
674 
675   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
676   B->factor               = FACTOR_LU;
677   lu                      = (Mat_MUMPS*)B->spptr;
678   lu->sym                 = 0;
679   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
680 
681   *F = B;
682   PetscFunctionReturn(0);
683 }
684 
685 /* Note the Petsc r permutation is ignored */
686 #undef __FUNCT__
687 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
688 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
689   Mat       B;
690   Mat_MUMPS *lu;
691   PetscErrorCode ierr;
692 
693   PetscFunctionBegin;
694   /* Create the factorization matrix */
695   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
696   ierr = MatSetSizes(B,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
697   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
698   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
699   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
700 
701   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
702   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
703   B->factor                     = FACTOR_CHOLESKY;
704   lu                            = (Mat_MUMPS*)B->spptr;
705   lu->sym                       = 2;
706   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
707 
708   *F = B;
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
714 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
715   PetscErrorCode ierr;
716   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
717 
718   PetscFunctionBegin;
719   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
720 
721   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
722   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
723   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
724   PetscFunctionReturn(0);
725 }
726 
727 EXTERN_C_BEGIN
728 #undef __FUNCT__
729 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
730 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
731 {
732   PetscErrorCode ierr;
733   PetscMPIInt    size;
734   MPI_Comm       comm;
735   Mat            B=*newmat;
736   Mat_MUMPS      *mumps;
737 
738   PetscFunctionBegin;
739   if (reuse == MAT_INITIAL_MATRIX) {
740     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
741   }
742 
743   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
744   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
745 
746   mumps->MatDuplicate              = A->ops->duplicate;
747   mumps->MatView                   = A->ops->view;
748   mumps->MatAssemblyEnd            = A->ops->assemblyend;
749   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
750   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
751   mumps->MatDestroy                = A->ops->destroy;
752   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
753   mumps->CleanUpMUMPS              = PETSC_FALSE;
754   mumps->isAIJ                     = PETSC_TRUE;
755 
756   B->spptr                         = (void*)mumps;
757   B->ops->duplicate                = MatDuplicate_MUMPS;
758   B->ops->view                     = MatView_AIJMUMPS;
759   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
760   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
761   B->ops->destroy                  = MatDestroy_MUMPS;
762 
763   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
764   if (size == 1) {
765     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
766                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
767     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
768                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
769   } else {
770     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
771                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
772     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
773                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
774   }
775 
776   ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for LU factorization and solves.\n"));CHKERRQ(ierr);
777   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
778   *newmat = B;
779   PetscFunctionReturn(0);
780 }
781 EXTERN_C_END
782 
783 /*MC
784   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
785   and sequential matrices via the external package MUMPS.
786 
787   If MUMPS is installed (see the manual for instructions
788   on how to declare the existence of external packages),
789   a matrix type can be constructed which invokes MUMPS solvers.
790   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
791 
792   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
793   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
794   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
795   for communicators controlling multiple processes.  It is recommended that you call both of
796   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
797   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
798   without data copy.
799 
800   Options Database Keys:
801 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
802 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
803 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
804 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
805 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
806 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
807 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
808 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
809 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
810 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
811 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
812 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
813 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
814 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
815 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
816 
817   Level: beginner
818 
819 .seealso: MATSBAIJMUMPS
820 M*/
821 
822 EXTERN_C_BEGIN
823 #undef __FUNCT__
824 #define __FUNCT__ "MatCreate_AIJMUMPS"
825 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
826 {
827   PetscErrorCode ierr;
828   int      size;
829   Mat      A_diag;
830   MPI_Comm comm;
831 
832   PetscFunctionBegin;
833   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
834   /*   and AIJMUMPS types */
835   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr);
836   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
838   if (size == 1) {
839     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
840   } else {
841     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
842     A_diag = ((Mat_MPIAIJ *)A->data)->A;
843     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
844   }
845   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 EXTERN_C_END
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
852 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
853 {
854   PetscErrorCode ierr;
855   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
856 
857   PetscFunctionBegin;
858   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
859   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
860   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
861   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
862   PetscFunctionReturn(0);
863 }
864 
865 EXTERN_C_BEGIN
866 #undef __FUNCT__
867 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
868 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat  B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
869 {
870   Mat       A;
871   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   /*
876     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
877     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
878     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
879     block size info so that PETSc can determine the local size properly.  The block size info is set
880     in the preallocation routine.
881   */
882   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
883   A    = ((Mat_MPISBAIJ *)B->data)->A;
884   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 EXTERN_C_END
888 
889 EXTERN_C_BEGIN
890 #undef __FUNCT__
891 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
892 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
893 {
894   PetscErrorCode ierr;
895   PetscMPIInt    size;
896   MPI_Comm       comm;
897   Mat            B=*newmat;
898   Mat_MUMPS      *mumps;
899   void           (*f)(void);
900 
901   PetscFunctionBegin;
902   if (reuse == MAT_INITIAL_MATRIX) {
903     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
904   }
905 
906   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
907   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
908 
909   mumps->MatDuplicate              = A->ops->duplicate;
910   mumps->MatView                   = A->ops->view;
911   mumps->MatAssemblyEnd            = A->ops->assemblyend;
912   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
913   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
914   mumps->MatDestroy                = A->ops->destroy;
915   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
916   mumps->CleanUpMUMPS              = PETSC_FALSE;
917   mumps->isAIJ                     = PETSC_FALSE;
918 
919   B->spptr                         = (void*)mumps;
920   B->ops->duplicate                = MatDuplicate_MUMPS;
921   B->ops->view                     = MatView_AIJMUMPS;
922   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
923   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
924   B->ops->destroy                  = MatDestroy_MUMPS;
925 
926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
927   if (size == 1) {
928     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
929                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
930     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
931                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
932   } else {
933   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
934     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
935     if (f) { /* This case should always be true when this routine is called */
936       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
937       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
938                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
939                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
940     }
941     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
942                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
943     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
944                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
945   }
946 
947   ierr = PetscLogInfo((0,"MatConvert_AIJ_AIJMUMPS:Using MUMPS for Cholesky factorization and solves.\n"));CHKERRQ(ierr);
948   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
949   *newmat = B;
950   PetscFunctionReturn(0);
951 }
952 EXTERN_C_END
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatDuplicate_MUMPS"
956 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
957   PetscErrorCode ierr;
958   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
959 
960   PetscFunctionBegin;
961   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
962   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 /*MC
967   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
968   distributed and sequential matrices via the external package MUMPS.
969 
970   If MUMPS is installed (see the manual for instructions
971   on how to declare the existence of external packages),
972   a matrix type can be constructed which invokes MUMPS solvers.
973   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
974 
975   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
976   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
977   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
978   for communicators controlling multiple processes.  It is recommended that you call both of
979   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
980   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
981   without data copy.
982 
983   Options Database Keys:
984 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
985 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
986 . -mat_mumps_icntl_4 <0,...,4> - print level
987 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
988 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
989 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
990 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
991 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
992 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
993 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
994 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
995 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
996 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
997 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
998 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
999 
1000   Level: beginner
1001 
1002 .seealso: MATAIJMUMPS
1003 M*/
1004 
1005 EXTERN_C_BEGIN
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1008 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1009 {
1010   PetscErrorCode ierr;
1011   int size;
1012 
1013   PetscFunctionBegin;
1014   /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */
1015   /*   and SBAIJMUMPS types */
1016   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1018   if (size == 1) {
1019     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1020   } else {
1021     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1022   }
1023   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 EXTERN_C_END
1027