xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 608e5a7a8a35b40c4e75a0aae313cbdeb3df9fee)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71   *b = (PetscMUMPSInt)(a);
72   PetscFunctionReturn(0);
73 }
74 
75 /* Put these utility routines here since they are only used in this file */
76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78   PetscErrorCode ierr;
79   PetscInt       myval;
80   PetscBool      myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97         MUMPS_c(&mumps->id); \
98         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99       } \
100       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
107       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRMPI(ierr);\
108       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid,petsc_size;
155   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
161   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
162   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
163   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
164 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165   PetscInt       *rhs_nrow,max_nrhs;
166   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
167   PetscScalar    *rhs_loc,*rhs_recvbuf;
168 #endif
169   Vec            b_seq,x_seq;
170   PetscInt       ninfo,*info;           /* which INFO to display */
171   PetscInt       sizeredrhs;
172   PetscScalar    *schur_sol;
173   PetscInt       schur_sizesol;
174   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
175   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
176   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177 
178   /* stuff used by petsc/mumps OpenMP support*/
179   PetscBool      use_petsc_omp_support;
180   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
182   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
183   PetscMPIInt    tag,omp_comm_size;
184   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
185   MPI_Request    *reqs;
186 };
187 
188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190  */
191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192 {
193   PetscErrorCode ierr;
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
202       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
207       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
211     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscErrorCode ierr;
226 
227   PetscFunctionBegin;
228   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
229   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
230   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
231   mumps->id.size_schur = 0;
232   mumps->id.schur_lld  = 0;
233   mumps->id.ICNTL(19)  = 0;
234   PetscFunctionReturn(0);
235 }
236 
237 /* solve with rhs in mumps->id.redrhs and return in the same location */
238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239 {
240   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
241   Mat                  S,B,X;
242   MatFactorSchurStatus schurstatus;
243   PetscInt             sizesol;
244   PetscErrorCode       ierr;
245 
246   PetscFunctionBegin;
247   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
248   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
249   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
250   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
257     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
263     } else {
264       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
271       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
272       mumps->schur_sizesol = sizesol;
273     }
274     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
275     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
278 #endif
279     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
282     } else {
283       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
284     }
285     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
286     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
287     ierr = MatProductNumeric(X);CHKERRQ(ierr);
288 
289     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
290     break;
291   default:
292     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
293   }
294   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
295   ierr = MatDestroy(&B);CHKERRQ(ierr);
296   ierr = MatDestroy(&X);CHKERRQ(ierr);
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
307     PetscFunctionReturn(0);
308   }
309   if (!expansion) { /* prepare for the condensation step */
310     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
311     /* allocate MUMPS internal array to store reduced right-hand sides */
312     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
313       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
314       mumps->id.lredrhs = mumps->id.size_schur;
315       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
316       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
317     }
318     mumps->id.ICNTL(26) = 1; /* condensation phase */
319   } else { /* prepare for the expansion step */
320     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
321     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
322     mumps->id.ICNTL(26) = 2; /* expansion phase */
323     PetscMUMPS_c(mumps);
324     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
325     /* restore defaults */
326     mumps->id.ICNTL(26) = -1;
327     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
328     if (mumps->id.nrhs > 1) {
329       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
330       mumps->id.lredrhs = 0;
331       mumps->sizeredrhs = 0;
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 /*
338   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
339 
340   input:
341     A       - matrix in aij,baij or sbaij format
342     shift   - 0: C style output triple; 1: Fortran style output triple.
343     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
344               MAT_REUSE_MATRIX:   only the values in v array are updated
345   output:
346     nnz     - dim of r, c, and v (number of local nonzero entries of A)
347     r, c, v - row and col index, matrix values (matrix triples)
348 
349   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
350   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
351   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
352 
353  */
354 
355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
356 {
357   const PetscScalar *av;
358   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
359   PetscInt64        nz,rnz,i,j,k;
360   PetscErrorCode    ierr;
361   PetscMUMPSInt     *row,*col;
362   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
366   mumps->val = (PetscScalar*)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz   = aa->nz;
369     ai   = aa->i;
370     aj   = aa->j;
371     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
372     for (i=k=0; i<M; i++) {
373       rnz = ai[i+1] - ai[i];
374       ajj = aj + ai[i];
375       for (j=0; j<rnz; j++) {
376         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
377         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
390 {
391   PetscErrorCode ierr;
392   PetscInt64     nz,i,j,k,r;
393   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
394   PetscMUMPSInt  *row,*col;
395 
396   PetscFunctionBegin;
397   mumps->val = a->val;
398   if (reuse == MAT_INITIAL_MATRIX) {
399     nz   = a->sliidx[a->totalslices];
400     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
401     for (i=k=0; i<a->totalslices; i++) {
402       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
403         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
404       }
405     }
406     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
407     mumps->irn = row;
408     mumps->jcn = col;
409     mumps->nnz = nz;
410   }
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
415 {
416   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
417   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
418   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
419   PetscInt       bs;
420   PetscErrorCode ierr;
421   PetscMUMPSInt  *row,*col;
422 
423   PetscFunctionBegin;
424   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
425   M          = A->rmap->N/bs;
426   mumps->val = aa->a;
427   if (reuse == MAT_INITIAL_MATRIX) {
428     ai   = aa->i; aj = aa->j;
429     nz   = bs2*aa->nz;
430     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
431     for (i=0; i<M; i++) {
432       ajj = aj + ai[i];
433       rnz = ai[i+1] - ai[i];
434       for (k=0; k<rnz; k++) {
435         for (j=0; j<bs; j++) {
436           for (m=0; m<bs; m++) {
437             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
438             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
439             idx++;
440           }
441         }
442       }
443     }
444     mumps->irn = row;
445     mumps->jcn = col;
446     mumps->nnz = nz;
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
452 {
453   const PetscInt *ai, *aj,*ajj;
454   PetscInt        bs;
455   PetscInt64      nz,rnz,i,j,k,m;
456   PetscErrorCode  ierr;
457   PetscMUMPSInt   *row,*col;
458   PetscScalar     *val;
459   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
460   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscBool       hermitian;
463 #endif
464 
465   PetscFunctionBegin;
466 #if defined(PETSC_USE_COMPLEX)
467   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
468   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
469 #endif
470   ai   = aa->i;
471   aj   = aa->j;
472   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
473   if (reuse == MAT_INITIAL_MATRIX) {
474     nz   = aa->nz;
475     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
476     if (bs>1) {
477       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
478       mumps->val = mumps->val_alloc;
479     } else {
480       mumps->val = aa->a;
481     }
482     mumps->irn = row;
483     mumps->jcn = col;
484   } else {
485     if (bs == 1) mumps->val = aa->a;
486     row = mumps->irn;
487     col = mumps->jcn;
488   }
489   val = mumps->val;
490 
491   nz = 0;
492   if (bs>1) {
493     for (i=0; i<mbs; i++) {
494       rnz = ai[i+1] - ai[i];
495       ajj = aj + ai[i];
496       for (j=0; j<rnz; j++) {
497         for (k=0; k<bs; k++) {
498           for (m=0; m<bs; m++) {
499             if (ajj[j]>i || k>=m) {
500               if (reuse == MAT_INITIAL_MATRIX) {
501                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
502                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
503               }
504               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
505             }
506           }
507         }
508       }
509     }
510   } else if (reuse == MAT_INITIAL_MATRIX) {
511     for (i=0; i<mbs; i++) {
512       rnz = ai[i+1] - ai[i];
513       ajj = aj + ai[i];
514       for (j=0; j<rnz; j++) {
515         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
516         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
517         nz++;
518       }
519     }
520     if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
521   }
522   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
527 {
528   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
529   PetscInt64        nz,rnz,i,j;
530   const PetscScalar *av,*v1;
531   PetscScalar       *val;
532   PetscErrorCode    ierr;
533   PetscMUMPSInt     *row,*col;
534   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
535   PetscBool         missing;
536 #if defined(PETSC_USE_COMPLEX)
537   PetscBool         hermitian;
538 #endif
539 
540   PetscFunctionBegin;
541 #if defined(PETSC_USE_COMPLEX)
542   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
543   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
544 #endif
545   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
546   ai    = aa->i; aj = aa->j;
547   adiag = aa->diag;
548   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
549   if (reuse == MAT_INITIAL_MATRIX) {
550     /* count nz in the upper triangular part of A */
551     nz = 0;
552     if (missing) {
553       for (i=0; i<M; i++) {
554         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
555           for (j=ai[i];j<ai[i+1];j++) {
556             if (aj[j] < i) continue;
557             nz++;
558           }
559         } else {
560           nz += ai[i+1] - adiag[i];
561         }
562       }
563     } else {
564       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
565     }
566     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
567     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
568     mumps->nnz = nz;
569     mumps->irn = row;
570     mumps->jcn = col;
571     mumps->val = mumps->val_alloc = val;
572 
573     nz = 0;
574     if (missing) {
575       for (i=0; i<M; i++) {
576         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
577           for (j=ai[i];j<ai[i+1];j++) {
578             if (aj[j] < i) continue;
579             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
580             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
581             val[nz] = av[j];
582             nz++;
583           }
584         } else {
585           rnz = ai[i+1] - adiag[i];
586           ajj = aj + adiag[i];
587           v1  = av + adiag[i];
588           for (j=0; j<rnz; j++) {
589             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
590             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
591             val[nz++] = v1[j];
592           }
593         }
594       }
595     } else {
596       for (i=0; i<M; i++) {
597         rnz = ai[i+1] - adiag[i];
598         ajj = aj + adiag[i];
599         v1  = av + adiag[i];
600         for (j=0; j<rnz; j++) {
601           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
602           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
603           val[nz++] = v1[j];
604         }
605       }
606     }
607   } else {
608     nz = 0;
609     val = mumps->val;
610     if (missing) {
611       for (i=0; i <M; i++) {
612         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
613           for (j=ai[i];j<ai[i+1];j++) {
614             if (aj[j] < i) continue;
615             val[nz++] = av[j];
616           }
617         } else {
618           rnz = ai[i+1] - adiag[i];
619           v1  = av + adiag[i];
620           for (j=0; j<rnz; j++) {
621             val[nz++] = v1[j];
622           }
623         }
624       }
625     } else {
626       for (i=0; i <M; i++) {
627         rnz = ai[i+1] - adiag[i];
628         v1  = av + adiag[i];
629         for (j=0; j<rnz; j++) {
630           val[nz++] = v1[j];
631         }
632       }
633     }
634   }
635   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
640 {
641   PetscErrorCode    ierr;
642   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
643   PetscInt          bs;
644   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
645   PetscMUMPSInt     *row,*col;
646   const PetscScalar *av,*bv,*v1,*v2;
647   PetscScalar       *val;
648   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
649   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
650   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
651   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
652 #if defined(PETSC_USE_COMPLEX)
653   PetscBool         hermitian;
654 #endif
655 
656   PetscFunctionBegin;
657 #if defined(PETSC_USE_COMPLEX)
658   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
659   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
660 #endif
661   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
662   rstart = A->rmap->rstart;
663   ai = aa->i;
664   aj = aa->j;
665   bi = bb->i;
666   bj = bb->j;
667   av = aa->a;
668   bv = bb->a;
669 
670   garray = mat->garray;
671 
672   if (reuse == MAT_INITIAL_MATRIX) {
673     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
674     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
675     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
676     /* can not decide the exact mumps->nnz now because of the SBAIJ */
677     mumps->irn = row;
678     mumps->jcn = col;
679     mumps->val = mumps->val_alloc = val;
680   } else {
681     val = mumps->val;
682   }
683 
684   jj = 0; irow = rstart;
685   for (i=0; i<mbs; i++) {
686     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
687     countA = ai[i+1] - ai[i];
688     countB = bi[i+1] - bi[i];
689     bjj    = bj + bi[i];
690     v1     = av + ai[i]*bs2;
691     v2     = bv + bi[i]*bs2;
692 
693     if (bs>1) {
694       /* A-part */
695       for (j=0; j<countA; j++) {
696         for (k=0; k<bs; k++) {
697           for (m=0; m<bs; m++) {
698             if (rstart + ajj[j]*bs>irow || k>=m) {
699               if (reuse == MAT_INITIAL_MATRIX) {
700                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
701                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
702               }
703               val[jj++] = v1[j*bs2 + m + k*bs];
704             }
705           }
706         }
707       }
708 
709       /* B-part */
710       for (j=0; j < countB; j++) {
711         for (k=0; k<bs; k++) {
712           for (m=0; m<bs; m++) {
713             if (reuse == MAT_INITIAL_MATRIX) {
714               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
715               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
716             }
717             val[jj++] = v2[j*bs2 + m + k*bs];
718           }
719         }
720       }
721     } else {
722       /* A-part */
723       for (j=0; j<countA; j++) {
724         if (reuse == MAT_INITIAL_MATRIX) {
725           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
726           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
727         }
728         val[jj++] = v1[j];
729       }
730 
731       /* B-part */
732       for (j=0; j < countB; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
735           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
736         }
737         val[jj++] = v2[j];
738       }
739     }
740     irow+=bs;
741   }
742   mumps->nnz = jj;
743   PetscFunctionReturn(0);
744 }
745 
746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
747 {
748   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
749   PetscErrorCode    ierr;
750   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
751   PetscMUMPSInt     *row,*col;
752   const PetscScalar *av, *bv,*v1,*v2;
753   PetscScalar       *val;
754   Mat               Ad,Ao;
755   Mat_SeqAIJ        *aa;
756   Mat_SeqAIJ        *bb;
757 
758   PetscFunctionBegin;
759   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
760   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
761   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
762 
763   aa = (Mat_SeqAIJ*)(Ad)->data;
764   bb = (Mat_SeqAIJ*)(Ao)->data;
765   ai = aa->i;
766   aj = aa->j;
767   bi = bb->i;
768   bj = bb->j;
769 
770   rstart = A->rmap->rstart;
771 
772   if (reuse == MAT_INITIAL_MATRIX) {
773     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
774     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
775     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
776     mumps->nnz = nz;
777     mumps->irn = row;
778     mumps->jcn = col;
779     mumps->val = mumps->val_alloc = val;
780   } else {
781     val = mumps->val;
782   }
783 
784   jj = 0; irow = rstart;
785   for (i=0; i<m; i++) {
786     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
787     countA = ai[i+1] - ai[i];
788     countB = bi[i+1] - bi[i];
789     bjj    = bj + bi[i];
790     v1     = av + ai[i];
791     v2     = bv + bi[i];
792 
793     /* A-part */
794     for (j=0; j<countA; j++) {
795       if (reuse == MAT_INITIAL_MATRIX) {
796         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
797         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
798       }
799       val[jj++] = v1[j];
800     }
801 
802     /* B-part */
803     for (j=0; j < countB; j++) {
804       if (reuse == MAT_INITIAL_MATRIX) {
805         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
806         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
807       }
808       val[jj++] = v2[j];
809     }
810     irow++;
811   }
812   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
813   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
818 {
819   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
820   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
821   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
822   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
823   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
824   const PetscInt    bs2=mat->bs2;
825   PetscErrorCode    ierr;
826   PetscInt          bs;
827   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
828   PetscMUMPSInt     *row,*col;
829   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
830   PetscScalar       *val;
831 
832   PetscFunctionBegin;
833   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
834   if (reuse == MAT_INITIAL_MATRIX) {
835     nz   = bs2*(aa->nz + bb->nz);
836     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
837     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
838     mumps->nnz = nz;
839     mumps->irn = row;
840     mumps->jcn = col;
841     mumps->val = mumps->val_alloc = val;
842   } else {
843     val = mumps->val;
844   }
845 
846   jj = 0; irow = rstart;
847   for (i=0; i<mbs; i++) {
848     countA = ai[i+1] - ai[i];
849     countB = bi[i+1] - bi[i];
850     ajj    = aj + ai[i];
851     bjj    = bj + bi[i];
852     v1     = av + bs2*ai[i];
853     v2     = bv + bs2*bi[i];
854 
855     idx = 0;
856     /* A-part */
857     for (k=0; k<countA; k++) {
858       for (j=0; j<bs; j++) {
859         for (n=0; n<bs; n++) {
860           if (reuse == MAT_INITIAL_MATRIX) {
861             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
862             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
863           }
864           val[jj++] = v1[idx++];
865         }
866       }
867     }
868 
869     idx = 0;
870     /* B-part */
871     for (k=0; k<countB; k++) {
872       for (j=0; j<bs; j++) {
873         for (n=0; n<bs; n++) {
874           if (reuse == MAT_INITIAL_MATRIX) {
875             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
876             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
877           }
878           val[jj++] = v2[idx++];
879         }
880       }
881     }
882     irow += bs;
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
888 {
889   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
890   PetscErrorCode    ierr;
891   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
892   PetscMUMPSInt     *row,*col;
893   const PetscScalar *av, *bv,*v1,*v2;
894   PetscScalar       *val;
895   Mat               Ad,Ao;
896   Mat_SeqAIJ        *aa;
897   Mat_SeqAIJ        *bb;
898 #if defined(PETSC_USE_COMPLEX)
899   PetscBool         hermitian;
900 #endif
901 
902   PetscFunctionBegin;
903 #if defined(PETSC_USE_COMPLEX)
904   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
905   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
906 #endif
907   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
908   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
909   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
910 
911   aa    = (Mat_SeqAIJ*)(Ad)->data;
912   bb    = (Mat_SeqAIJ*)(Ao)->data;
913   ai    = aa->i;
914   aj    = aa->j;
915   adiag = aa->diag;
916   bi    = bb->i;
917   bj    = bb->j;
918 
919   rstart = A->rmap->rstart;
920 
921   if (reuse == MAT_INITIAL_MATRIX) {
922     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
923     nzb = 0;    /* num of upper triangular entries in mat->B */
924     for (i=0; i<m; i++) {
925       nza   += (ai[i+1] - adiag[i]);
926       countB = bi[i+1] - bi[i];
927       bjj    = bj + bi[i];
928       for (j=0; j<countB; j++) {
929         if (garray[bjj[j]] > rstart) nzb++;
930       }
931     }
932 
933     nz   = nza + nzb; /* total nz of upper triangular part of mat */
934     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
935     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
936     mumps->nnz = nz;
937     mumps->irn = row;
938     mumps->jcn = col;
939     mumps->val = mumps->val_alloc = val;
940   } else {
941     val = mumps->val;
942   }
943 
944   jj = 0; irow = rstart;
945   for (i=0; i<m; i++) {
946     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
947     v1     = av + adiag[i];
948     countA = ai[i+1] - adiag[i];
949     countB = bi[i+1] - bi[i];
950     bjj    = bj + bi[i];
951     v2     = bv + bi[i];
952 
953     /* A-part */
954     for (j=0; j<countA; j++) {
955       if (reuse == MAT_INITIAL_MATRIX) {
956         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
957         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
958       }
959       val[jj++] = v1[j];
960     }
961 
962     /* B-part */
963     for (j=0; j < countB; j++) {
964       if (garray[bjj[j]] > rstart) {
965         if (reuse == MAT_INITIAL_MATRIX) {
966           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
967           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
968         }
969         val[jj++] = v2[j];
970       }
971     }
972     irow++;
973   }
974   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
975   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatDestroy_MUMPS(Mat A)
980 {
981   PetscErrorCode ierr;
982   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
983 
984   PetscFunctionBegin;
985   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
986   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
987   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
988   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
989   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
990   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
991   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
992   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
993   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
994   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
995   mumps->id.job = JOB_END;
996   PetscMUMPS_c(mumps);
997   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
998 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999   if (mumps->use_petsc_omp_support) {
1000     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1001     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1002     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1003   }
1004 #endif
1005   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1006   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1007   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1008   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1009   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1010   if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRMPI(ierr);}
1011   ierr = PetscFree(A->data);CHKERRQ(ierr);
1012 
1013   /* clear composed functions */
1014   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1015   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1016   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1017   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1018   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1032 {
1033   PetscErrorCode     ierr;
1034   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1035   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1036   PetscInt           i,m,M,rstart;
1037 
1038   PetscFunctionBegin;
1039   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1040   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1041   if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1042   if (ompsize == 1) {
1043     if (!mumps->irhs_loc) {
1044       mumps->nloc_rhs = m;
1045       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1046       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1047       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1048     }
1049     mumps->id.rhs_loc = (MumpsScalar*)array;
1050   } else {
1051   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1052     const PetscInt  *ranges;
1053     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1054     MPI_Group       petsc_group,omp_group;
1055     PetscScalar     *recvbuf=NULL;
1056 
1057     if (mumps->is_omp_master) {
1058       /* Lazily initialize the omp stuff for distributed rhs */
1059       if (!mumps->irhs_loc) {
1060         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1061         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1062         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr);
1063         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr);
1064         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1065         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr);
1066 
1067         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1068         mumps->nloc_rhs = 0;
1069         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1070         for (j=0; j<ompsize; j++) {
1071           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1072           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1073         }
1074         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1075         for (j=k=0; j<ompsize; j++) {
1076           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1077         }
1078 
1079         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1080         ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr);
1081         ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr);
1082       }
1083 
1084       /* Realloc buffers when current nrhs is bigger than what we have met */
1085       if (nrhs > mumps->max_nrhs) {
1086         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1087         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1088         mumps->max_nrhs = nrhs;
1089       }
1090 
1091       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1092       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1093       mumps->rhs_disps[0] = 0;
1094       for (j=1; j<ompsize; j++) {
1095         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1096         if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1097       }
1098       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1099     }
1100 
1101     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1102     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr);
1103 
1104     if (mumps->is_omp_master) {
1105       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1106         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1107         for (j=0; j<ompsize; j++) {
1108           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1109           dst = dstbase;
1110           for (i=0; i<nrhs; i++) {
1111             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1112             src += mumps->rhs_nrow[j];
1113             dst += mumps->nloc_rhs;
1114           }
1115           dstbase += mumps->rhs_nrow[j];
1116         }
1117       }
1118       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1119     }
1120   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1121   }
1122   mumps->id.nrhs     = nrhs;
1123   mumps->id.nloc_rhs = mumps->nloc_rhs;
1124   mumps->id.lrhs_loc = mumps->nloc_rhs;
1125   mumps->id.irhs_loc = mumps->irhs_loc;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1130 {
1131   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1132   const PetscScalar  *rarray = NULL;
1133   PetscScalar        *array;
1134   IS                 is_iden,is_petsc;
1135   PetscErrorCode     ierr;
1136   PetscInt           i;
1137   PetscBool          second_solve = PETSC_FALSE;
1138   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1139 
1140   PetscFunctionBegin;
1141   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1142   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1143 
1144   if (A->factorerrortype) {
1145     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1146     ierr = VecSetInf(x);CHKERRQ(ierr);
1147     PetscFunctionReturn(0);
1148   }
1149 
1150   mumps->id.nrhs = 1;
1151   if (mumps->petsc_size > 1) {
1152     if (mumps->ICNTL20 == 10) {
1153       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1154       ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1155       ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1156     } else {
1157       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1158       ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159       ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       if (!mumps->myid) {
1161         ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1162         mumps->id.rhs = (MumpsScalar*)array;
1163       }
1164     }
1165   } else {  /* petsc_size == 1 */
1166     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1167     ierr = VecCopy(b,x);CHKERRQ(ierr);
1168     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1169     mumps->id.rhs = (MumpsScalar*)array;
1170   }
1171 
1172   /*
1173      handle condensation step of Schur complement (if any)
1174      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1175      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1176      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1177      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1178   */
1179   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1180     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1181     second_solve = PETSC_TRUE;
1182     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1183   }
1184   /* solve phase */
1185   /*-------------*/
1186   mumps->id.job = JOB_SOLVE;
1187   PetscMUMPS_c(mumps);
1188   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1189 
1190   /* handle expansion step of Schur complement (if any) */
1191   if (second_solve) {
1192     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1193   }
1194 
1195   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1196     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1197       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1198       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1199     }
1200     if (!mumps->scat_sol) { /* create scatter scat_sol */
1201       PetscInt *isol2_loc=NULL;
1202       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1203       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1204       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1205       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1206       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1207       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1208       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1209       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1210     }
1211 
1212     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214   }
1215 
1216   if (mumps->petsc_size > 1) {
1217     if (mumps->ICNTL20 == 10) {
1218       ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1219     } else if (!mumps->myid) {
1220       ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1221     }
1222   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1223 
1224   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1229 {
1230   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   mumps->id.ICNTL(9) = 0;
1235   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1236   mumps->id.ICNTL(9) = 1;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1241 {
1242   PetscErrorCode    ierr;
1243   Mat               Bt = NULL;
1244   PetscBool         denseX,denseB,flg,flgT;
1245   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1246   PetscInt          i,nrhs,M;
1247   PetscScalar       *array;
1248   const PetscScalar *rbray;
1249   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1250   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1251   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1252   IS                is_to,is_from;
1253   PetscInt          k,proc,j,m,myrstart;
1254   const PetscInt    *rstart;
1255   Vec               v_mpi,msol_loc;
1256   VecScatter        scat_sol;
1257   Vec               b_seq;
1258   VecScatter        scat_rhs;
1259   PetscScalar       *aa;
1260   PetscInt          spnr,*ia,*ja;
1261   Mat_MPIAIJ        *b = NULL;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1265   if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1266 
1267   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1268   if (denseB) {
1269     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1270     mumps->id.ICNTL(20)= 0; /* dense RHS */
1271   } else { /* sparse B */
1272     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1273     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1274     if (flgT) { /* input B is transpose of actural RHS matrix,
1275                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1276       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1277     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1278     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1279   }
1280 
1281   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1282   mumps->id.nrhs = nrhs;
1283   mumps->id.lrhs = M;
1284   mumps->id.rhs  = NULL;
1285 
1286   if (mumps->petsc_size == 1) {
1287     PetscScalar *aa;
1288     PetscInt    spnr,*ia,*ja;
1289     PetscBool   second_solve = PETSC_FALSE;
1290 
1291     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1292     mumps->id.rhs = (MumpsScalar*)array;
1293 
1294     if (denseB) {
1295       /* copy B to X */
1296       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1297       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1298       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1299     } else { /* sparse B */
1300       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1301       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1302       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1303       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1304       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1305     }
1306     /* handle condensation step of Schur complement (if any) */
1307     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1308       second_solve = PETSC_TRUE;
1309       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1310     }
1311     /* solve phase */
1312     /*-------------*/
1313     mumps->id.job = JOB_SOLVE;
1314     PetscMUMPS_c(mumps);
1315     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1316 
1317     /* handle expansion step of Schur complement (if any) */
1318     if (second_solve) {
1319       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1320     }
1321     if (!denseB) { /* sparse B */
1322       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1323       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1324       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1325     }
1326     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1327     PetscFunctionReturn(0);
1328   }
1329 
1330   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1331   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1332 
1333   /* create msol_loc to hold mumps local solution */
1334   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1335   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1336 
1337   lsol_loc  = mumps->id.lsol_loc;
1338   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1339   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1340   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1341   mumps->id.isol_loc = isol_loc;
1342 
1343   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1344 
1345   if (denseB) {
1346     if (mumps->ICNTL20 == 10) {
1347       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1348       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1349       ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1350       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1351       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1352       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1353     } else {
1354       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1355       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1356         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1357         0, re-arrange B into desired order, which is a local operation.
1358       */
1359 
1360       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1361       /* wrap dense rhs matrix B into a vector v_mpi */
1362       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1363       ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1364       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1365       ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1366 
1367       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1368       if (!mumps->myid) {
1369         PetscInt *idx;
1370         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1371         ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1372         ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1373         k = 0;
1374         for (proc=0; proc<mumps->petsc_size; proc++) {
1375           for (j=0; j<nrhs; j++) {
1376             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1377           }
1378         }
1379 
1380         ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1381         ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1382         ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1383       } else {
1384         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1385         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1386         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1387       }
1388       ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1389       ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1390       ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1391       ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1392       ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393 
1394       if (!mumps->myid) { /* define rhs on the host */
1395         ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1396         mumps->id.rhs = (MumpsScalar*)bray;
1397         ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1398       }
1399     }
1400   } else { /* sparse B */
1401     b = (Mat_MPIAIJ*)Bt->data;
1402 
1403     /* wrap dense X into a vector v_mpi */
1404     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1405     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1406     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1407     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1408 
1409     if (!mumps->myid) {
1410       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1411       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1412       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1413       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1414       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1415     } else {
1416       mumps->id.irhs_ptr    = NULL;
1417       mumps->id.irhs_sparse = NULL;
1418       mumps->id.nz_rhs      = 0;
1419       mumps->id.rhs_sparse  = NULL;
1420     }
1421   }
1422 
1423   /* solve phase */
1424   /*-------------*/
1425   mumps->id.job = JOB_SOLVE;
1426   PetscMUMPS_c(mumps);
1427   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1428 
1429   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1430   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1431   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1432 
1433   /* create scatter scat_sol */
1434   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1435   /* iidx: index for scatter mumps solution to petsc X */
1436 
1437   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1438   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1439   for (i=0; i<lsol_loc; i++) {
1440     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1441 
1442     for (proc=0; proc<mumps->petsc_size; proc++) {
1443       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1444         myrstart = rstart[proc];
1445         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1446         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1447         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1448         break;
1449       }
1450     }
1451 
1452     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1453   }
1454   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1455   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1456   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1458   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1459   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1461 
1462   /* free spaces */
1463   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1464   mumps->id.isol_loc = isol_loc_save;
1465 
1466   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1467   ierr = PetscFree(idxx);CHKERRQ(ierr);
1468   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1469   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1470   if (!denseB) {
1471     if (!mumps->myid) {
1472       b = (Mat_MPIAIJ*)Bt->data;
1473       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1474       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1475       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1476     }
1477   } else {
1478     if (mumps->ICNTL20 == 0) {
1479       ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1480       ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1481     }
1482   }
1483   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1484   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1489 {
1490   PetscErrorCode ierr;
1491   PetscBool      flg;
1492   Mat            B;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1496   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1497 
1498   /* Create B=Bt^T that uses Bt's data structure */
1499   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1500 
1501   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1502   ierr = MatDestroy(&B);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #if !defined(PETSC_USE_COMPLEX)
1507 /*
1508   input:
1509    F:        numeric factor
1510   output:
1511    nneg:     total number of negative pivots
1512    nzero:    total number of zero pivots
1513    npos:     (global dimension of F) - nneg - nzero
1514 */
1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1516 {
1517   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1518   PetscErrorCode ierr;
1519   PetscMPIInt    size;
1520 
1521   PetscFunctionBegin;
1522   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr);
1523   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1524   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1525 
1526   if (nneg) *nneg = mumps->id.INFOG(12);
1527   if (nzero || npos) {
1528     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1529     if (nzero) *nzero = mumps->id.INFOG(28);
1530     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 #endif
1535 
1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1537 {
1538   PetscErrorCode ierr;
1539   PetscInt       i,nreqs;
1540   PetscMUMPSInt  *irn,*jcn;
1541   PetscMPIInt    count;
1542   PetscInt64     totnnz,remain;
1543   const PetscInt osize=mumps->omp_comm_size;
1544   PetscScalar    *val;
1545 
1546   PetscFunctionBegin;
1547   if (osize > 1) {
1548     if (reuse == MAT_INITIAL_MATRIX) {
1549       /* master first gathers counts of nonzeros to receive */
1550       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1551       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr);
1552 
1553       /* Then each computes number of send/recvs */
1554       if (mumps->is_omp_master) {
1555         /* Start from 1 since self communication is not done in MPI */
1556         nreqs = 0;
1557         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1558       } else {
1559         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1560       }
1561       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1562 
1563       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1564          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1565          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1566          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1567        */
1568       nreqs = 0; /* counter for actual send/recvs */
1569       if (mumps->is_omp_master) {
1570         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1571         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1572         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1573 
1574         /* Self communication */
1575         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1576         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1577         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1578 
1579         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1580         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1581         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1582         mumps->nnz = totnnz;
1583         mumps->irn = irn;
1584         mumps->jcn = jcn;
1585         mumps->val = mumps->val_alloc = val;
1586 
1587         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1588         jcn += mumps->recvcount[0];
1589         val += mumps->recvcount[0];
1590 
1591         /* Remote communication */
1592         for (i=1; i<osize; i++) {
1593           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1594           remain = mumps->recvcount[i] - count;
1595           while (count>0) {
1596             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1597             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1598             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1599             irn    += count;
1600             jcn    += count;
1601             val    += count;
1602             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1603             remain -= count;
1604           }
1605         }
1606       } else {
1607         irn    = mumps->irn;
1608         jcn    = mumps->jcn;
1609         val    = mumps->val;
1610         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1611         remain = mumps->nnz - count;
1612         while (count>0) {
1613           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1614           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1615           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1616           irn    += count;
1617           jcn    += count;
1618           val    += count;
1619           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1620           remain -= count;
1621         }
1622       }
1623     } else {
1624       nreqs = 0;
1625       if (mumps->is_omp_master) {
1626         val = mumps->val + mumps->recvcount[0];
1627         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1628           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1629           remain = mumps->recvcount[i] - count;
1630           while (count>0) {
1631             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1632             val    += count;
1633             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1634             remain -= count;
1635           }
1636         }
1637       } else {
1638         val    = mumps->val;
1639         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1640         remain = mumps->nnz - count;
1641         while (count>0) {
1642           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1643           val    += count;
1644           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1645           remain -= count;
1646         }
1647       }
1648     }
1649     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
1650     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1651   }
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1656 {
1657   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1658   PetscErrorCode ierr;
1659   PetscBool      isMPIAIJ;
1660 
1661   PetscFunctionBegin;
1662   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1663     if (mumps->id.INFOG(1) == -6) {
1664       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1665     }
1666     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1667     PetscFunctionReturn(0);
1668   }
1669 
1670   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1671   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1672 
1673   /* numerical factorization phase */
1674   /*-------------------------------*/
1675   mumps->id.job = JOB_FACTNUMERIC;
1676   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1677     if (!mumps->myid) {
1678       mumps->id.a = (MumpsScalar*)mumps->val;
1679     }
1680   } else {
1681     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1682   }
1683   PetscMUMPS_c(mumps);
1684   if (mumps->id.INFOG(1) < 0) {
1685     if (A->erroriffailure) {
1686       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1687     } else {
1688       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1689         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1690         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1691       } else if (mumps->id.INFOG(1) == -13) {
1692         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1693         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1694       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1695         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1696         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1697       } else {
1698         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1699         F->factorerrortype = MAT_FACTOR_OTHER;
1700       }
1701     }
1702   }
1703   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1704 
1705   F->assembled    = PETSC_TRUE;
1706   mumps->matstruc = SAME_NONZERO_PATTERN;
1707   if (F->schur) { /* reset Schur status to unfactored */
1708 #if defined(PETSC_HAVE_CUDA)
1709     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1710 #endif
1711     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1712       mumps->id.ICNTL(19) = 2;
1713       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1714     }
1715     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1716   }
1717 
1718   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1719   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1720 
1721   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1722   if (mumps->petsc_size > 1) {
1723     PetscInt    lsol_loc;
1724     PetscScalar *sol_loc;
1725 
1726     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1727 
1728     /* distributed solution; Create x_seq=sol_loc for repeated use */
1729     if (mumps->x_seq) {
1730       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1731       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1732       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1733     }
1734     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1735     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1736     mumps->id.lsol_loc = lsol_loc;
1737     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1738     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1739   }
1740   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 /* Sets MUMPS options from the options database */
1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1746 {
1747   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1748   PetscErrorCode ierr;
1749   PetscMUMPSInt  icntl=0;
1750   PetscInt       info[80],i,ninfo=80;
1751   PetscBool      flg=PETSC_FALSE;
1752 
1753   PetscFunctionBegin;
1754   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1755   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1756   if (flg) mumps->id.ICNTL(1) = icntl;
1757   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1758   if (flg) mumps->id.ICNTL(2) = icntl;
1759   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1760   if (flg) mumps->id.ICNTL(3) = icntl;
1761 
1762   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1763   if (flg) mumps->id.ICNTL(4) = icntl;
1764   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1765 
1766   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1767   if (flg) mumps->id.ICNTL(6) = icntl;
1768 
1769   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1770   if (flg) {
1771     if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto\n");
1772     mumps->id.ICNTL(7) = icntl;
1773   }
1774 
1775   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1776   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1777   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1778   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1780   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1781   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1782   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1783   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1784     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1785     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1786   }
1787 
1788   /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake.
1789      So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1790      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1791    */
1792 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1793   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1794 #else
1795   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1796 #endif
1797   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1798   if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20);
1799 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1800   if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n");
1801 #endif
1802   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1803 
1804   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1805   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1806   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1807   if (mumps->id.ICNTL(24)) {
1808     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1809   }
1810 
1811   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1812   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1813   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1814   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1815   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1816   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1817   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1818   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1819   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1820   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1821   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1822   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1823 
1824   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1825   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1826   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1827   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1828   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1829   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1830 
1831   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1832 
1833   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1834   if (ninfo) {
1835     if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1836     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1837     mumps->ninfo = ninfo;
1838     for (i=0; i<ninfo; i++) {
1839       if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1840       else  mumps->info[i] = info[i];
1841     }
1842   }
1843 
1844   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1849 {
1850   PetscErrorCode ierr;
1851   PetscInt       nthreads=0;
1852   MPI_Comm       newcomm=MPI_COMM_NULL;
1853 
1854   PetscFunctionBegin;
1855   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1856   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr);
1857   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */
1858 
1859   ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1860   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1861   ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1862   if (mumps->use_petsc_omp_support) {
1863 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1864     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1865     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1866 #else
1867     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1868 #endif
1869   } else {
1870     mumps->omp_comm      = PETSC_COMM_SELF;
1871     mumps->mumps_comm    = mumps->petsc_comm;
1872     mumps->is_omp_master = PETSC_TRUE;
1873   }
1874   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr);
1875   mumps->reqs = NULL;
1876   mumps->tag  = 0;
1877 
1878   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1879   if (mumps->mumps_comm != MPI_COMM_NULL) {
1880     ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRMPI(ierr);
1881     mumps->mumps_comm = newcomm;
1882   }
1883 
1884   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1885   mumps->id.job = JOB_INIT;
1886   mumps->id.par = 1;  /* host participates factorizaton and solve */
1887   mumps->id.sym = mumps->sym;
1888 
1889   PetscMUMPS_c(mumps);
1890   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1891 
1892   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1893      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1894    */
1895   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRMPI(ierr);
1896   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr);
1897 
1898   mumps->scat_rhs = NULL;
1899   mumps->scat_sol = NULL;
1900 
1901   /* set PETSc-MUMPS default options - override MUMPS default */
1902   mumps->id.ICNTL(3) = 0;
1903   mumps->id.ICNTL(4) = 0;
1904   if (mumps->petsc_size == 1) {
1905     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1906     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1907   } else {
1908     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1909     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1910   }
1911 
1912   /* schur */
1913   mumps->id.size_schur    = 0;
1914   mumps->id.listvar_schur = NULL;
1915   mumps->id.schur         = NULL;
1916   mumps->sizeredrhs       = 0;
1917   mumps->schur_sol        = NULL;
1918   mumps->schur_sizesol    = 0;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1923 {
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   if (mumps->id.INFOG(1) < 0) {
1928     if (A->erroriffailure) {
1929       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1930     } else {
1931       if (mumps->id.INFOG(1) == -6) {
1932         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1933         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1934       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1935         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1936         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1937       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1938         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1939       } else {
1940         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1941         F->factorerrortype = MAT_FACTOR_OTHER;
1942       }
1943     }
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1949 {
1950   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1951   PetscErrorCode ierr;
1952   Vec            b;
1953   const PetscInt M = A->rmap->N;
1954 
1955   PetscFunctionBegin;
1956   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1957 
1958   /* Set MUMPS options from the options database */
1959   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1960 
1961   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1962   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1963 
1964   /* analysis phase */
1965   /*----------------*/
1966   mumps->id.job = JOB_FACTSYMBOLIC;
1967   mumps->id.n   = M;
1968   switch (mumps->id.ICNTL(18)) {
1969   case 0:  /* centralized assembled matrix input */
1970     if (!mumps->myid) {
1971       mumps->id.nnz = mumps->nnz;
1972       mumps->id.irn = mumps->irn;
1973       mumps->id.jcn = mumps->jcn;
1974       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1975       if (r) {
1976         mumps->id.ICNTL(7) = 1;
1977         if (!mumps->myid) {
1978           const PetscInt *idx;
1979           PetscInt       i;
1980 
1981           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1982           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1983           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1984           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1985         }
1986       }
1987     }
1988     break;
1989   case 3:  /* distributed assembled matrix input (size>1) */
1990     mumps->id.nnz_loc = mumps->nnz;
1991     mumps->id.irn_loc = mumps->irn;
1992     mumps->id.jcn_loc = mumps->jcn;
1993     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1994     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1995       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1996       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1997       ierr = VecDestroy(&b);CHKERRQ(ierr);
1998     }
1999     break;
2000   }
2001   PetscMUMPS_c(mumps);
2002   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2003 
2004   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2005   F->ops->solve           = MatSolve_MUMPS;
2006   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2007   F->ops->matsolve        = MatMatSolve_MUMPS;
2008   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 /* Note the Petsc r and c permutations are ignored */
2013 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2014 {
2015   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2016   PetscErrorCode ierr;
2017   Vec            b;
2018   const PetscInt M = A->rmap->N;
2019 
2020   PetscFunctionBegin;
2021   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2022 
2023   /* Set MUMPS options from the options database */
2024   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2025 
2026   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2027   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2028 
2029   /* analysis phase */
2030   /*----------------*/
2031   mumps->id.job = JOB_FACTSYMBOLIC;
2032   mumps->id.n   = M;
2033   switch (mumps->id.ICNTL(18)) {
2034   case 0:  /* centralized assembled matrix input */
2035     if (!mumps->myid) {
2036       mumps->id.nnz = mumps->nnz;
2037       mumps->id.irn = mumps->irn;
2038       mumps->id.jcn = mumps->jcn;
2039       if (mumps->id.ICNTL(6)>1) {
2040         mumps->id.a = (MumpsScalar*)mumps->val;
2041       }
2042     }
2043     break;
2044   case 3:  /* distributed assembled matrix input (size>1) */
2045     mumps->id.nnz_loc = mumps->nnz;
2046     mumps->id.irn_loc = mumps->irn;
2047     mumps->id.jcn_loc = mumps->jcn;
2048     if (mumps->id.ICNTL(6)>1) {
2049       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2050     }
2051     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2052       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2053       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2054       ierr = VecDestroy(&b);CHKERRQ(ierr);
2055     }
2056     break;
2057   }
2058   PetscMUMPS_c(mumps);
2059   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2060 
2061   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2062   F->ops->solve           = MatSolve_MUMPS;
2063   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 /* Note the Petsc r permutation and factor info are ignored */
2068 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2069 {
2070   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2071   PetscErrorCode ierr;
2072   Vec            b;
2073   const PetscInt M = A->rmap->N;
2074 
2075   PetscFunctionBegin;
2076   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2077 
2078   /* Set MUMPS options from the options database */
2079   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2080 
2081   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2082   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2083 
2084   /* analysis phase */
2085   /*----------------*/
2086   mumps->id.job = JOB_FACTSYMBOLIC;
2087   mumps->id.n   = M;
2088   switch (mumps->id.ICNTL(18)) {
2089   case 0:  /* centralized assembled matrix input */
2090     if (!mumps->myid) {
2091       mumps->id.nnz = mumps->nnz;
2092       mumps->id.irn = mumps->irn;
2093       mumps->id.jcn = mumps->jcn;
2094       if (mumps->id.ICNTL(6)>1) {
2095         mumps->id.a = (MumpsScalar*)mumps->val;
2096       }
2097     }
2098     break;
2099   case 3:  /* distributed assembled matrix input (size>1) */
2100     mumps->id.nnz_loc = mumps->nnz;
2101     mumps->id.irn_loc = mumps->irn;
2102     mumps->id.jcn_loc = mumps->jcn;
2103     if (mumps->id.ICNTL(6)>1) {
2104       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2105     }
2106     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2107       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2108       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2109       ierr = VecDestroy(&b);CHKERRQ(ierr);
2110     }
2111     break;
2112   }
2113   PetscMUMPS_c(mumps);
2114   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2115 
2116   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2117   F->ops->solve                 = MatSolve_MUMPS;
2118   F->ops->solvetranspose        = MatSolve_MUMPS;
2119   F->ops->matsolve              = MatMatSolve_MUMPS;
2120   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2121 #if defined(PETSC_USE_COMPLEX)
2122   F->ops->getinertia = NULL;
2123 #else
2124   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2125 #endif
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2130 {
2131   PetscErrorCode    ierr;
2132   PetscBool         iascii;
2133   PetscViewerFormat format;
2134   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2135 
2136   PetscFunctionBegin;
2137   /* check if matrix is mumps type */
2138   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2139 
2140   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2141   if (iascii) {
2142     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2143     if (format == PETSC_VIEWER_ASCII_INFO) {
2144       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2145       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym);CHKERRQ(ierr);
2146       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par);CHKERRQ(ierr);
2147       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2148       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2149       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2150       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2151       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2152       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2153       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2154       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2155       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2156       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2157       if (mumps->id.ICNTL(11)>0) {
2158         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2159         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2160         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2161         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2162         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2163         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2164       }
2165       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2166       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2167       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2168       /* ICNTL(15-17) not used */
2169       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2171       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2173       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2174       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2175 
2176       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2177       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2178       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2179       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2180       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2181       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2182 
2183       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2184       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2185       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2186       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2187       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2188       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2189 
2190       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2191       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2192       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2193       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2194       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2195       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2196 
2197       /* infomation local to each processor */
2198       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2199       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2200       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2201       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2202       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2203       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2204       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2205       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2206       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2207       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2208 
2209       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2210       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2211       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2212 
2213       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2214       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2215       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2216 
2217       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2218       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2219       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2220 
2221       if (mumps->ninfo && mumps->ninfo <= 80) {
2222         PetscInt i;
2223         for (i=0; i<mumps->ninfo; i++) {
2224           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr);
2225           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2226           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2227         }
2228       }
2229       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2230 
2231       if (!mumps->myid) { /* information from the host */
2232         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2233         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2234         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2235         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2236 
2237         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr);
2238         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr);
2239         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr);
2240         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr);
2241         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr);
2242         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr);
2243         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr);
2244         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr);
2245         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr);
2246         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr);
2247         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr);
2248         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr);
2249         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr);
2250         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr);
2252         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr);
2253         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr);
2254         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr);
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2263         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2264         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2265         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2266         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2267         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr);
2268         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr);
2269         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr);
2270         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr);
2271       }
2272     }
2273   }
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2278 {
2279   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2280 
2281   PetscFunctionBegin;
2282   info->block_size        = 1.0;
2283   info->nz_allocated      = mumps->id.INFOG(20);
2284   info->nz_used           = mumps->id.INFOG(20);
2285   info->nz_unneeded       = 0.0;
2286   info->assemblies        = 0.0;
2287   info->mallocs           = 0.0;
2288   info->memory            = 0.0;
2289   info->fill_ratio_given  = 0;
2290   info->fill_ratio_needed = 0;
2291   info->factor_mallocs    = 0;
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 /* -------------------------------------------------------------------------------------------*/
2296 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2297 {
2298   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2299   const PetscScalar *arr;
2300   const PetscInt    *idxs;
2301   PetscInt          size,i;
2302   PetscErrorCode    ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2306   if (mumps->petsc_size > 1) {
2307     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2308 
2309     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2310     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr);
2311     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2312   }
2313 
2314   /* Schur complement matrix */
2315   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2316   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2317   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2318   mumps->id.schur      = (MumpsScalar*)arr;
2319   mumps->id.size_schur = size;
2320   mumps->id.schur_lld  = size;
2321   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2322   if (mumps->sym == 1) {
2323     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2324   }
2325 
2326   /* MUMPS expects Fortran style indices */
2327   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2328   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2329   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2330   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2331   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2332   if (mumps->petsc_size > 1) {
2333     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2334   } else {
2335     if (F->factortype == MAT_FACTOR_LU) {
2336       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2337     } else {
2338       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2339     }
2340   }
2341   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2342   mumps->id.ICNTL(26) = -1;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 /* -------------------------------------------------------------------------------------------*/
2347 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2348 {
2349   Mat            St;
2350   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2351   PetscScalar    *array;
2352 #if defined(PETSC_USE_COMPLEX)
2353   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2354 #endif
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2359   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2360   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2361   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2362   ierr = MatSetUp(St);CHKERRQ(ierr);
2363   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2364   if (!mumps->sym) { /* MUMPS always return a full matrix */
2365     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2366       PetscInt i,j,N=mumps->id.size_schur;
2367       for (i=0;i<N;i++) {
2368         for (j=0;j<N;j++) {
2369 #if !defined(PETSC_USE_COMPLEX)
2370           PetscScalar val = mumps->id.schur[i*N+j];
2371 #else
2372           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2373 #endif
2374           array[j*N+i] = val;
2375         }
2376       }
2377     } else { /* stored by columns */
2378       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2379     }
2380   } else { /* either full or lower-triangular (not packed) */
2381     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2382       PetscInt i,j,N=mumps->id.size_schur;
2383       for (i=0;i<N;i++) {
2384         for (j=i;j<N;j++) {
2385 #if !defined(PETSC_USE_COMPLEX)
2386           PetscScalar val = mumps->id.schur[i*N+j];
2387 #else
2388           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2389 #endif
2390           array[i*N+j] = val;
2391           array[j*N+i] = val;
2392         }
2393       }
2394     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2395       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2396     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2397       PetscInt i,j,N=mumps->id.size_schur;
2398       for (i=0;i<N;i++) {
2399         for (j=0;j<i+1;j++) {
2400 #if !defined(PETSC_USE_COMPLEX)
2401           PetscScalar val = mumps->id.schur[i*N+j];
2402 #else
2403           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2404 #endif
2405           array[i*N+j] = val;
2406           array[j*N+i] = val;
2407         }
2408       }
2409     }
2410   }
2411   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2412   *S   = St;
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 /* -------------------------------------------------------------------------------------------*/
2417 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2418 {
2419   PetscErrorCode ierr;
2420   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2421 
2422   PetscFunctionBegin;
2423   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2428 {
2429   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2430 
2431   PetscFunctionBegin;
2432   *ival = mumps->id.ICNTL(icntl);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 /*@
2437   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2438 
2439    Logically Collective on Mat
2440 
2441    Input Parameters:
2442 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2443 .  icntl - index of MUMPS parameter array ICNTL()
2444 -  ival - value of MUMPS ICNTL(icntl)
2445 
2446   Options Database:
2447 .   -mat_mumps_icntl_<icntl> <ival>
2448 
2449    Level: beginner
2450 
2451    References:
2452 .     MUMPS Users' Guide
2453 
2454 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2455  @*/
2456 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2457 {
2458   PetscErrorCode ierr;
2459 
2460   PetscFunctionBegin;
2461   PetscValidType(F,1);
2462   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2463   PetscValidLogicalCollectiveInt(F,icntl,2);
2464   PetscValidLogicalCollectiveInt(F,ival,3);
2465   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 /*@
2470   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2471 
2472    Logically Collective on Mat
2473 
2474    Input Parameters:
2475 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2476 -  icntl - index of MUMPS parameter array ICNTL()
2477 
2478   Output Parameter:
2479 .  ival - value of MUMPS ICNTL(icntl)
2480 
2481    Level: beginner
2482 
2483    References:
2484 .     MUMPS Users' Guide
2485 
2486 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2487 @*/
2488 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2489 {
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   PetscValidType(F,1);
2494   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2495   PetscValidLogicalCollectiveInt(F,icntl,2);
2496   PetscValidIntPointer(ival,3);
2497   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 /* -------------------------------------------------------------------------------------------*/
2502 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2503 {
2504   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2505 
2506   PetscFunctionBegin;
2507   mumps->id.CNTL(icntl) = val;
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2512 {
2513   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2514 
2515   PetscFunctionBegin;
2516   *val = mumps->id.CNTL(icntl);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 /*@
2521   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2522 
2523    Logically Collective on Mat
2524 
2525    Input Parameters:
2526 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2527 .  icntl - index of MUMPS parameter array CNTL()
2528 -  val - value of MUMPS CNTL(icntl)
2529 
2530   Options Database:
2531 .   -mat_mumps_cntl_<icntl> <val>
2532 
2533    Level: beginner
2534 
2535    References:
2536 .     MUMPS Users' Guide
2537 
2538 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2539 @*/
2540 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2541 {
2542   PetscErrorCode ierr;
2543 
2544   PetscFunctionBegin;
2545   PetscValidType(F,1);
2546   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2547   PetscValidLogicalCollectiveInt(F,icntl,2);
2548   PetscValidLogicalCollectiveReal(F,val,3);
2549   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 /*@
2554   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2555 
2556    Logically Collective on Mat
2557 
2558    Input Parameters:
2559 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2560 -  icntl - index of MUMPS parameter array CNTL()
2561 
2562   Output Parameter:
2563 .  val - value of MUMPS CNTL(icntl)
2564 
2565    Level: beginner
2566 
2567    References:
2568 .      MUMPS Users' Guide
2569 
2570 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2571 @*/
2572 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2573 {
2574   PetscErrorCode ierr;
2575 
2576   PetscFunctionBegin;
2577   PetscValidType(F,1);
2578   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2579   PetscValidLogicalCollectiveInt(F,icntl,2);
2580   PetscValidRealPointer(val,3);
2581   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2586 {
2587   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2588 
2589   PetscFunctionBegin;
2590   *info = mumps->id.INFO(icntl);
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2595 {
2596   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2597 
2598   PetscFunctionBegin;
2599   *infog = mumps->id.INFOG(icntl);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2604 {
2605   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2606 
2607   PetscFunctionBegin;
2608   *rinfo = mumps->id.RINFO(icntl);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2613 {
2614   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2615 
2616   PetscFunctionBegin;
2617   *rinfog = mumps->id.RINFOG(icntl);
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2622 {
2623   PetscErrorCode ierr;
2624   Mat            Bt = NULL,Btseq = NULL;
2625   PetscBool      flg;
2626   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2627   PetscScalar    *aa;
2628   PetscInt       spnr,*ia,*ja,M,nrhs;
2629 
2630   PetscFunctionBegin;
2631   PetscValidPointer(spRHS,2);
2632   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2633   if (flg) {
2634     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2635   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2636 
2637   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2638 
2639   if (mumps->petsc_size > 1) {
2640     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2641     Btseq = b->A;
2642   } else {
2643     Btseq = Bt;
2644   }
2645 
2646   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2647   mumps->id.nrhs = nrhs;
2648   mumps->id.lrhs = M;
2649   mumps->id.rhs  = NULL;
2650 
2651   if (!mumps->myid) {
2652     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2653     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2654     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2655     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2656     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2657   } else {
2658     mumps->id.irhs_ptr    = NULL;
2659     mumps->id.irhs_sparse = NULL;
2660     mumps->id.nz_rhs      = 0;
2661     mumps->id.rhs_sparse  = NULL;
2662   }
2663   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2664   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2665 
2666   /* solve phase */
2667   /*-------------*/
2668   mumps->id.job = JOB_SOLVE;
2669   PetscMUMPS_c(mumps);
2670   if (mumps->id.INFOG(1) < 0)
2671     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2672 
2673   if (!mumps->myid) {
2674     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2675     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2676     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2677   }
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 /*@
2682   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2683 
2684    Logically Collective on Mat
2685 
2686    Input Parameters:
2687 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2688 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2689 
2690   Output Parameter:
2691 . spRHS - requested entries of inverse of A
2692 
2693    Level: beginner
2694 
2695    References:
2696 .      MUMPS Users' Guide
2697 
2698 .seealso: MatGetFactor(), MatCreateTranspose()
2699 @*/
2700 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2701 {
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   PetscValidType(F,1);
2706   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2707   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2712 {
2713   PetscErrorCode ierr;
2714   Mat            spRHS;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2718   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2719   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 /*@
2724   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2725 
2726    Logically Collective on Mat
2727 
2728    Input Parameters:
2729 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2730 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2731 
2732   Output Parameter:
2733 . spRHST - requested entries of inverse of A^T
2734 
2735    Level: beginner
2736 
2737    References:
2738 .      MUMPS Users' Guide
2739 
2740 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2741 @*/
2742 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2743 {
2744   PetscErrorCode ierr;
2745   PetscBool      flg;
2746 
2747   PetscFunctionBegin;
2748   PetscValidType(F,1);
2749   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2750   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2751   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2752 
2753   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 /*@
2758   MatMumpsGetInfo - Get MUMPS parameter INFO()
2759 
2760    Logically Collective on Mat
2761 
2762    Input Parameters:
2763 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2764 -  icntl - index of MUMPS parameter array INFO()
2765 
2766   Output Parameter:
2767 .  ival - value of MUMPS INFO(icntl)
2768 
2769    Level: beginner
2770 
2771    References:
2772 .      MUMPS Users' Guide
2773 
2774 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2775 @*/
2776 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2777 {
2778   PetscErrorCode ierr;
2779 
2780   PetscFunctionBegin;
2781   PetscValidType(F,1);
2782   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2783   PetscValidIntPointer(ival,3);
2784   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 /*@
2789   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2790 
2791    Logically Collective on Mat
2792 
2793    Input Parameters:
2794 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2795 -  icntl - index of MUMPS parameter array INFOG()
2796 
2797   Output Parameter:
2798 .  ival - value of MUMPS INFOG(icntl)
2799 
2800    Level: beginner
2801 
2802    References:
2803 .      MUMPS Users' Guide
2804 
2805 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2806 @*/
2807 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2808 {
2809   PetscErrorCode ierr;
2810 
2811   PetscFunctionBegin;
2812   PetscValidType(F,1);
2813   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2814   PetscValidIntPointer(ival,3);
2815   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 /*@
2820   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2821 
2822    Logically Collective on Mat
2823 
2824    Input Parameters:
2825 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2826 -  icntl - index of MUMPS parameter array RINFO()
2827 
2828   Output Parameter:
2829 .  val - value of MUMPS RINFO(icntl)
2830 
2831    Level: beginner
2832 
2833    References:
2834 .       MUMPS Users' Guide
2835 
2836 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2837 @*/
2838 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2839 {
2840   PetscErrorCode ierr;
2841 
2842   PetscFunctionBegin;
2843   PetscValidType(F,1);
2844   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2845   PetscValidRealPointer(val,3);
2846   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 /*@
2851   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2852 
2853    Logically Collective on Mat
2854 
2855    Input Parameters:
2856 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2857 -  icntl - index of MUMPS parameter array RINFOG()
2858 
2859   Output Parameter:
2860 .  val - value of MUMPS RINFOG(icntl)
2861 
2862    Level: beginner
2863 
2864    References:
2865 .      MUMPS Users' Guide
2866 
2867 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2868 @*/
2869 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2870 {
2871   PetscErrorCode ierr;
2872 
2873   PetscFunctionBegin;
2874   PetscValidType(F,1);
2875   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2876   PetscValidRealPointer(val,3);
2877   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 /*MC
2882   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2883   distributed and sequential matrices via the external package MUMPS.
2884 
2885   Works with MATAIJ and MATSBAIJ matrices
2886 
2887   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2888 
2889   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2890 
2891   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2892 
2893   Options Database Keys:
2894 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2895 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2896 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2897 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2898 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2899 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2900                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2901 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2902 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2903 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2904 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2905 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2906 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2907 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2908 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2909 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2910 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2911 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2912 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2913 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2914 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2915 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2916 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2917 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2918 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2919 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2920 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2921 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2922 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2923 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2924 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2925 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2926 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2927 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2928 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2929                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2930 
2931   Level: beginner
2932 
2933     Notes:
2934     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2935 
2936     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2937 $          KSPGetPC(ksp,&pc);
2938 $          PCFactorGetMatrix(pc,&mat);
2939 $          MatMumpsGetInfo(mat,....);
2940 $          MatMumpsGetInfog(mat,....); etc.
2941            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2942 
2943   Using MUMPS with 64-bit integers
2944     MUMPS provides 64-bit integer support in two build modes:
2945       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2946       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2947 
2948       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2949       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2950       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2951       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2952 
2953     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2954 
2955   Two modes to run MUMPS/PETSc with OpenMP
2956 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2957 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2958 
2959 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2960 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2961 
2962    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2963    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2964    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2965    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2966    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2967 
2968    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2969    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2970    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2971    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2972    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2973    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2974    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2975    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2976    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2977    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2978    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2979    examine the mapping result.
2980 
2981    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2982    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2983    calls omp_set_num_threads(m) internally before calling MUMPS.
2984 
2985    References:
2986 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2987 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2988 
2989 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2990 
2991 M*/
2992 
2993 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2994 {
2995   PetscFunctionBegin;
2996   *type = MATSOLVERMUMPS;
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 /* MatGetFactor for Seq and MPI AIJ matrices */
3001 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3002 {
3003   Mat            B;
3004   PetscErrorCode ierr;
3005   Mat_MUMPS      *mumps;
3006   PetscBool      isSeqAIJ;
3007   PetscMPIInt    size;
3008 
3009   PetscFunctionBegin;
3010  #if defined(PETSC_USE_COMPLEX)
3011   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3012  #endif
3013   /* Create the factorization matrix */
3014   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3015   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3016   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3017   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3018   ierr = MatSetUp(B);CHKERRQ(ierr);
3019 
3020   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3021 
3022   B->ops->view    = MatView_MUMPS;
3023   B->ops->getinfo = MatGetInfo_MUMPS;
3024 
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3038 
3039   if (ftype == MAT_FACTOR_LU) {
3040     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3041     B->factortype            = MAT_FACTOR_LU;
3042     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3043     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3044     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3045     mumps->sym = 0;
3046   } else {
3047     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3048     B->factortype                  = MAT_FACTOR_CHOLESKY;
3049     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3050     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3051     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3052 #if defined(PETSC_USE_COMPLEX)
3053     mumps->sym = 2;
3054 #else
3055     if (A->spd_set && A->spd) mumps->sym = 1;
3056     else                      mumps->sym = 2;
3057 #endif
3058   }
3059 
3060   /* set solvertype */
3061   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3062   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3063   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3064   if (size == 1) {
3065     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3066     B->canuseordering = PETSC_TRUE;
3067   }
3068   B->ops->destroy = MatDestroy_MUMPS;
3069   B->data         = (void*)mumps;
3070 
3071   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3072 
3073   *F = B;
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3078 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3079 {
3080   Mat            B;
3081   PetscErrorCode ierr;
3082   Mat_MUMPS      *mumps;
3083   PetscBool      isSeqSBAIJ;
3084   PetscMPIInt    size;
3085 
3086   PetscFunctionBegin;
3087  #if defined(PETSC_USE_COMPLEX)
3088   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3089  #endif
3090   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3091   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3092   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3093   ierr = MatSetUp(B);CHKERRQ(ierr);
3094 
3095   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3096   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3097   if (isSeqSBAIJ) {
3098     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3099   } else {
3100     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3101   }
3102 
3103   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3104   B->ops->view                   = MatView_MUMPS;
3105   B->ops->getinfo                = MatGetInfo_MUMPS;
3106 
3107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3120 
3121   B->factortype = MAT_FACTOR_CHOLESKY;
3122 #if defined(PETSC_USE_COMPLEX)
3123   mumps->sym = 2;
3124 #else
3125   if (A->spd_set && A->spd) mumps->sym = 1;
3126   else                      mumps->sym = 2;
3127 #endif
3128 
3129   /* set solvertype */
3130   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3131   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3132   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3133   if (size == 1) {
3134     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3135     B->canuseordering = PETSC_TRUE;
3136   }
3137   ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3138   B->ops->destroy = MatDestroy_MUMPS;
3139   B->data         = (void*)mumps;
3140 
3141   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3142 
3143   *F = B;
3144   PetscFunctionReturn(0);
3145 }
3146 
3147 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3148 {
3149   Mat            B;
3150   PetscErrorCode ierr;
3151   Mat_MUMPS      *mumps;
3152   PetscBool      isSeqBAIJ;
3153   PetscMPIInt    size;
3154 
3155   PetscFunctionBegin;
3156   /* Create the factorization matrix */
3157   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3158   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3159   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3160   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3161   ierr = MatSetUp(B);CHKERRQ(ierr);
3162 
3163   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3164   if (ftype == MAT_FACTOR_LU) {
3165     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3166     B->factortype            = MAT_FACTOR_LU;
3167     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3168     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3169     mumps->sym = 0;
3170     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3171   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3172 
3173   B->ops->view        = MatView_MUMPS;
3174   B->ops->getinfo     = MatGetInfo_MUMPS;
3175 
3176   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3178   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3188   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3189 
3190   /* set solvertype */
3191   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3192   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3193   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3194   if (size == 1) {
3195     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3196     B->canuseordering = PETSC_TRUE;
3197   }
3198   B->ops->destroy = MatDestroy_MUMPS;
3199   B->data         = (void*)mumps;
3200 
3201   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3202 
3203   *F = B;
3204   PetscFunctionReturn(0);
3205 }
3206 
3207 /* MatGetFactor for Seq and MPI SELL matrices */
3208 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3209 {
3210   Mat            B;
3211   PetscErrorCode ierr;
3212   Mat_MUMPS      *mumps;
3213   PetscBool      isSeqSELL;
3214   PetscMPIInt    size;
3215 
3216   PetscFunctionBegin;
3217   /* Create the factorization matrix */
3218   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3219   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3220   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3221   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3222   ierr = MatSetUp(B);CHKERRQ(ierr);
3223 
3224   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3225 
3226   B->ops->view        = MatView_MUMPS;
3227   B->ops->getinfo     = MatGetInfo_MUMPS;
3228 
3229   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3230   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3231   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3232   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3233   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3234   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3236   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3237   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3240 
3241   if (ftype == MAT_FACTOR_LU) {
3242     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3243     B->factortype            = MAT_FACTOR_LU;
3244     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3245     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3246     mumps->sym = 0;
3247     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3248   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3249 
3250   /* set solvertype */
3251   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3252   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3253   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3254   if (size == 1) {
3255     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3256     B->canuseordering = PETSC_TRUE;
3257   }
3258   B->ops->destroy = MatDestroy_MUMPS;
3259   B->data         = (void*)mumps;
3260 
3261   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3262 
3263   *F = B;
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3268 {
3269   PetscErrorCode ierr;
3270 
3271   PetscFunctionBegin;
3272   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3273   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3274   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3275   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3276   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3277   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3278   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3279   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3280   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3281   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3282   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 
3286