xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71   *b = (PetscMUMPSInt)(a);
72   PetscFunctionReturn(0);
73 }
74 
75 /* Put these utility routines here since they are only used in this file */
76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78   PetscErrorCode ierr;
79   PetscInt       myval;
80   PetscBool      myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97         MUMPS_c(&mumps->id); \
98         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99       } \
100       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
107       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRMPI(ierr);\
108       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid,petsc_size;
155   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
161   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
162   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
163   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
164 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165   PetscInt       *rhs_nrow,max_nrhs;
166   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
167   PetscScalar    *rhs_loc,*rhs_recvbuf;
168 #endif
169   Vec            b_seq,x_seq;
170   PetscInt       ninfo,*info;           /* which INFO to display */
171   PetscInt       sizeredrhs;
172   PetscScalar    *schur_sol;
173   PetscInt       schur_sizesol;
174   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
175   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
176   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177 
178   /* stuff used by petsc/mumps OpenMP support*/
179   PetscBool      use_petsc_omp_support;
180   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
182   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
183   PetscMPIInt    tag,omp_comm_size;
184   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
185   MPI_Request    *reqs;
186 };
187 
188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190  */
191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192 {
193   PetscErrorCode ierr;
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
202       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
207       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
211     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscErrorCode ierr;
226 
227   PetscFunctionBegin;
228   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
229   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
230   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
231   mumps->id.size_schur = 0;
232   mumps->id.schur_lld  = 0;
233   mumps->id.ICNTL(19)  = 0;
234   PetscFunctionReturn(0);
235 }
236 
237 /* solve with rhs in mumps->id.redrhs and return in the same location */
238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239 {
240   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
241   Mat                  S,B,X;
242   MatFactorSchurStatus schurstatus;
243   PetscInt             sizesol;
244   PetscErrorCode       ierr;
245 
246   PetscFunctionBegin;
247   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
248   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
249   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
250   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
257     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
263     } else {
264       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
271       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
272       mumps->schur_sizesol = sizesol;
273     }
274     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
275     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
278 #endif
279     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
282     } else {
283       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
284     }
285     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
286     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
287     ierr = MatProductNumeric(X);CHKERRQ(ierr);
288 
289     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
290     break;
291   default:
292     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
293   }
294   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
295   ierr = MatDestroy(&B);CHKERRQ(ierr);
296   ierr = MatDestroy(&X);CHKERRQ(ierr);
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
307     PetscFunctionReturn(0);
308   }
309   if (!expansion) { /* prepare for the condensation step */
310     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
311     /* allocate MUMPS internal array to store reduced right-hand sides */
312     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
313       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
314       mumps->id.lredrhs = mumps->id.size_schur;
315       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
316       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
317     }
318     mumps->id.ICNTL(26) = 1; /* condensation phase */
319   } else { /* prepare for the expansion step */
320     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
321     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
322     mumps->id.ICNTL(26) = 2; /* expansion phase */
323     PetscMUMPS_c(mumps);
324     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
325     /* restore defaults */
326     mumps->id.ICNTL(26) = -1;
327     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
328     if (mumps->id.nrhs > 1) {
329       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
330       mumps->id.lredrhs = 0;
331       mumps->sizeredrhs = 0;
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 /*
338   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
339 
340   input:
341     A       - matrix in aij,baij or sbaij format
342     shift   - 0: C style output triple; 1: Fortran style output triple.
343     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
344               MAT_REUSE_MATRIX:   only the values in v array are updated
345   output:
346     nnz     - dim of r, c, and v (number of local nonzero entries of A)
347     r, c, v - row and col index, matrix values (matrix triples)
348 
349   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
350   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
351   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
352 
353  */
354 
355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
356 {
357   const PetscScalar *av;
358   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
359   PetscInt64        nz,rnz,i,j,k;
360   PetscErrorCode    ierr;
361   PetscMUMPSInt     *row,*col;
362   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
366   mumps->val = (PetscScalar*)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz   = aa->nz;
369     ai   = aa->i;
370     aj   = aa->j;
371     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
372     for (i=k=0; i<M; i++) {
373       rnz = ai[i+1] - ai[i];
374       ajj = aj + ai[i];
375       for (j=0; j<rnz; j++) {
376         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
377         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
390 {
391   PetscErrorCode ierr;
392   PetscInt64     nz,i,j,k,r;
393   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
394   PetscMUMPSInt  *row,*col;
395 
396   PetscFunctionBegin;
397   mumps->val = a->val;
398   if (reuse == MAT_INITIAL_MATRIX) {
399     nz   = a->sliidx[a->totalslices];
400     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
401     for (i=k=0; i<a->totalslices; i++) {
402       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
403         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
404       }
405     }
406     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
407     mumps->irn = row;
408     mumps->jcn = col;
409     mumps->nnz = nz;
410   }
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
415 {
416   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
417   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
418   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
419   PetscInt       bs;
420   PetscErrorCode ierr;
421   PetscMUMPSInt  *row,*col;
422 
423   PetscFunctionBegin;
424   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
425   M          = A->rmap->N/bs;
426   mumps->val = aa->a;
427   if (reuse == MAT_INITIAL_MATRIX) {
428     ai   = aa->i; aj = aa->j;
429     nz   = bs2*aa->nz;
430     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
431     for (i=0; i<M; i++) {
432       ajj = aj + ai[i];
433       rnz = ai[i+1] - ai[i];
434       for (k=0; k<rnz; k++) {
435         for (j=0; j<bs; j++) {
436           for (m=0; m<bs; m++) {
437             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
438             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
439             idx++;
440           }
441         }
442       }
443     }
444     mumps->irn = row;
445     mumps->jcn = col;
446     mumps->nnz = nz;
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
452 {
453   const PetscInt *ai, *aj,*ajj;
454   PetscInt        bs;
455   PetscInt64      nz,rnz,i,j,k,m;
456   PetscErrorCode  ierr;
457   PetscMUMPSInt   *row,*col;
458   PetscScalar     *val;
459   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
460   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscBool       hermitian;
463 #endif
464 
465   PetscFunctionBegin;
466 #if defined(PETSC_USE_COMPLEX)
467   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
468   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
469 #endif
470   ai   = aa->i;
471   aj   = aa->j;
472   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
473   if (reuse == MAT_INITIAL_MATRIX) {
474     nz   = aa->nz;
475     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
476     if (bs>1) {
477       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
478       mumps->val = mumps->val_alloc;
479     } else {
480       mumps->val = aa->a;
481     }
482     mumps->irn = row;
483     mumps->jcn = col;
484   } else {
485     if (bs == 1) mumps->val = aa->a;
486     row = mumps->irn;
487     col = mumps->jcn;
488   }
489   val = mumps->val;
490 
491   nz = 0;
492   if (bs>1) {
493     for (i=0; i<mbs; i++) {
494       rnz = ai[i+1] - ai[i];
495       ajj = aj + ai[i];
496       for (j=0; j<rnz; j++) {
497         for (k=0; k<bs; k++) {
498           for (m=0; m<bs; m++) {
499             if (ajj[j]>i || k>=m) {
500               if (reuse == MAT_INITIAL_MATRIX) {
501                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
502                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
503               }
504               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
505             }
506           }
507         }
508       }
509     }
510   } else if (reuse == MAT_INITIAL_MATRIX) {
511     for (i=0; i<mbs; i++) {
512       rnz = ai[i+1] - ai[i];
513       ajj = aj + ai[i];
514       for (j=0; j<rnz; j++) {
515         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
516         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
517         nz++;
518       }
519     }
520     if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
521   }
522   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
527 {
528   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
529   PetscInt64        nz,rnz,i,j;
530   const PetscScalar *av,*v1;
531   PetscScalar       *val;
532   PetscErrorCode    ierr;
533   PetscMUMPSInt     *row,*col;
534   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
535   PetscBool         missing;
536 #if defined(PETSC_USE_COMPLEX)
537   PetscBool         hermitian;
538 #endif
539 
540   PetscFunctionBegin;
541 #if defined(PETSC_USE_COMPLEX)
542   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
543   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
544 #endif
545   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
546   ai    = aa->i; aj = aa->j;
547   adiag = aa->diag;
548   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
549   if (reuse == MAT_INITIAL_MATRIX) {
550     /* count nz in the upper triangular part of A */
551     nz = 0;
552     if (missing) {
553       for (i=0; i<M; i++) {
554         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
555           for (j=ai[i];j<ai[i+1];j++) {
556             if (aj[j] < i) continue;
557             nz++;
558           }
559         } else {
560           nz += ai[i+1] - adiag[i];
561         }
562       }
563     } else {
564       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
565     }
566     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
567     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
568     mumps->nnz = nz;
569     mumps->irn = row;
570     mumps->jcn = col;
571     mumps->val = mumps->val_alloc = val;
572 
573     nz = 0;
574     if (missing) {
575       for (i=0; i<M; i++) {
576         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
577           for (j=ai[i];j<ai[i+1];j++) {
578             if (aj[j] < i) continue;
579             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
580             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
581             val[nz] = av[j];
582             nz++;
583           }
584         } else {
585           rnz = ai[i+1] - adiag[i];
586           ajj = aj + adiag[i];
587           v1  = av + adiag[i];
588           for (j=0; j<rnz; j++) {
589             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
590             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
591             val[nz++] = v1[j];
592           }
593         }
594       }
595     } else {
596       for (i=0; i<M; i++) {
597         rnz = ai[i+1] - adiag[i];
598         ajj = aj + adiag[i];
599         v1  = av + adiag[i];
600         for (j=0; j<rnz; j++) {
601           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
602           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
603           val[nz++] = v1[j];
604         }
605       }
606     }
607   } else {
608     nz = 0;
609     val = mumps->val;
610     if (missing) {
611       for (i=0; i <M; i++) {
612         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
613           for (j=ai[i];j<ai[i+1];j++) {
614             if (aj[j] < i) continue;
615             val[nz++] = av[j];
616           }
617         } else {
618           rnz = ai[i+1] - adiag[i];
619           v1  = av + adiag[i];
620           for (j=0; j<rnz; j++) {
621             val[nz++] = v1[j];
622           }
623         }
624       }
625     } else {
626       for (i=0; i <M; i++) {
627         rnz = ai[i+1] - adiag[i];
628         v1  = av + adiag[i];
629         for (j=0; j<rnz; j++) {
630           val[nz++] = v1[j];
631         }
632       }
633     }
634   }
635   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
640 {
641   PetscErrorCode    ierr;
642   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
643   PetscInt          bs;
644   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
645   PetscMUMPSInt     *row,*col;
646   const PetscScalar *av,*bv,*v1,*v2;
647   PetscScalar       *val;
648   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
649   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
650   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
651   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
652 #if defined(PETSC_USE_COMPLEX)
653   PetscBool         hermitian;
654 #endif
655 
656   PetscFunctionBegin;
657 #if defined(PETSC_USE_COMPLEX)
658   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
659   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
660 #endif
661   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
662   rstart = A->rmap->rstart;
663   ai = aa->i;
664   aj = aa->j;
665   bi = bb->i;
666   bj = bb->j;
667   av = aa->a;
668   bv = bb->a;
669 
670   garray = mat->garray;
671 
672   if (reuse == MAT_INITIAL_MATRIX) {
673     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
674     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
675     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
676     /* can not decide the exact mumps->nnz now because of the SBAIJ */
677     mumps->irn = row;
678     mumps->jcn = col;
679     mumps->val = mumps->val_alloc = val;
680   } else {
681     val = mumps->val;
682   }
683 
684   jj = 0; irow = rstart;
685   for (i=0; i<mbs; i++) {
686     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
687     countA = ai[i+1] - ai[i];
688     countB = bi[i+1] - bi[i];
689     bjj    = bj + bi[i];
690     v1     = av + ai[i]*bs2;
691     v2     = bv + bi[i]*bs2;
692 
693     if (bs>1) {
694       /* A-part */
695       for (j=0; j<countA; j++) {
696         for (k=0; k<bs; k++) {
697           for (m=0; m<bs; m++) {
698             if (rstart + ajj[j]*bs>irow || k>=m) {
699               if (reuse == MAT_INITIAL_MATRIX) {
700                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
701                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
702               }
703               val[jj++] = v1[j*bs2 + m + k*bs];
704             }
705           }
706         }
707       }
708 
709       /* B-part */
710       for (j=0; j < countB; j++) {
711         for (k=0; k<bs; k++) {
712           for (m=0; m<bs; m++) {
713             if (reuse == MAT_INITIAL_MATRIX) {
714               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
715               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
716             }
717             val[jj++] = v2[j*bs2 + m + k*bs];
718           }
719         }
720       }
721     } else {
722       /* A-part */
723       for (j=0; j<countA; j++) {
724         if (reuse == MAT_INITIAL_MATRIX) {
725           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
726           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
727         }
728         val[jj++] = v1[j];
729       }
730 
731       /* B-part */
732       for (j=0; j < countB; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
735           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
736         }
737         val[jj++] = v2[j];
738       }
739     }
740     irow+=bs;
741   }
742   mumps->nnz = jj;
743   PetscFunctionReturn(0);
744 }
745 
746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
747 {
748   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
749   PetscErrorCode    ierr;
750   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
751   PetscMUMPSInt     *row,*col;
752   const PetscScalar *av, *bv,*v1,*v2;
753   PetscScalar       *val;
754   Mat               Ad,Ao;
755   Mat_SeqAIJ        *aa;
756   Mat_SeqAIJ        *bb;
757 
758   PetscFunctionBegin;
759   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
760   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
761   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
762 
763   aa = (Mat_SeqAIJ*)(Ad)->data;
764   bb = (Mat_SeqAIJ*)(Ao)->data;
765   ai = aa->i;
766   aj = aa->j;
767   bi = bb->i;
768   bj = bb->j;
769 
770   rstart = A->rmap->rstart;
771 
772   if (reuse == MAT_INITIAL_MATRIX) {
773     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
774     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
775     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
776     mumps->nnz = nz;
777     mumps->irn = row;
778     mumps->jcn = col;
779     mumps->val = mumps->val_alloc = val;
780   } else {
781     val = mumps->val;
782   }
783 
784   jj = 0; irow = rstart;
785   for (i=0; i<m; i++) {
786     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
787     countA = ai[i+1] - ai[i];
788     countB = bi[i+1] - bi[i];
789     bjj    = bj + bi[i];
790     v1     = av + ai[i];
791     v2     = bv + bi[i];
792 
793     /* A-part */
794     for (j=0; j<countA; j++) {
795       if (reuse == MAT_INITIAL_MATRIX) {
796         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
797         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
798       }
799       val[jj++] = v1[j];
800     }
801 
802     /* B-part */
803     for (j=0; j < countB; j++) {
804       if (reuse == MAT_INITIAL_MATRIX) {
805         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
806         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
807       }
808       val[jj++] = v2[j];
809     }
810     irow++;
811   }
812   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
813   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
818 {
819   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
820   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
821   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
822   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
823   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
824   const PetscInt    bs2=mat->bs2;
825   PetscErrorCode    ierr;
826   PetscInt          bs;
827   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
828   PetscMUMPSInt     *row,*col;
829   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
830   PetscScalar       *val;
831 
832   PetscFunctionBegin;
833   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
834   if (reuse == MAT_INITIAL_MATRIX) {
835     nz   = bs2*(aa->nz + bb->nz);
836     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
837     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
838     mumps->nnz = nz;
839     mumps->irn = row;
840     mumps->jcn = col;
841     mumps->val = mumps->val_alloc = val;
842   } else {
843     val = mumps->val;
844   }
845 
846   jj = 0; irow = rstart;
847   for (i=0; i<mbs; i++) {
848     countA = ai[i+1] - ai[i];
849     countB = bi[i+1] - bi[i];
850     ajj    = aj + ai[i];
851     bjj    = bj + bi[i];
852     v1     = av + bs2*ai[i];
853     v2     = bv + bs2*bi[i];
854 
855     idx = 0;
856     /* A-part */
857     for (k=0; k<countA; k++) {
858       for (j=0; j<bs; j++) {
859         for (n=0; n<bs; n++) {
860           if (reuse == MAT_INITIAL_MATRIX) {
861             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
862             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
863           }
864           val[jj++] = v1[idx++];
865         }
866       }
867     }
868 
869     idx = 0;
870     /* B-part */
871     for (k=0; k<countB; k++) {
872       for (j=0; j<bs; j++) {
873         for (n=0; n<bs; n++) {
874           if (reuse == MAT_INITIAL_MATRIX) {
875             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
876             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
877           }
878           val[jj++] = v2[idx++];
879         }
880       }
881     }
882     irow += bs;
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
888 {
889   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
890   PetscErrorCode    ierr;
891   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
892   PetscMUMPSInt     *row,*col;
893   const PetscScalar *av, *bv,*v1,*v2;
894   PetscScalar       *val;
895   Mat               Ad,Ao;
896   Mat_SeqAIJ        *aa;
897   Mat_SeqAIJ        *bb;
898 #if defined(PETSC_USE_COMPLEX)
899   PetscBool         hermitian;
900 #endif
901 
902   PetscFunctionBegin;
903 #if defined(PETSC_USE_COMPLEX)
904   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
905   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
906 #endif
907   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
908   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
909   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
910 
911   aa    = (Mat_SeqAIJ*)(Ad)->data;
912   bb    = (Mat_SeqAIJ*)(Ao)->data;
913   ai    = aa->i;
914   aj    = aa->j;
915   adiag = aa->diag;
916   bi    = bb->i;
917   bj    = bb->j;
918 
919   rstart = A->rmap->rstart;
920 
921   if (reuse == MAT_INITIAL_MATRIX) {
922     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
923     nzb = 0;    /* num of upper triangular entries in mat->B */
924     for (i=0; i<m; i++) {
925       nza   += (ai[i+1] - adiag[i]);
926       countB = bi[i+1] - bi[i];
927       bjj    = bj + bi[i];
928       for (j=0; j<countB; j++) {
929         if (garray[bjj[j]] > rstart) nzb++;
930       }
931     }
932 
933     nz   = nza + nzb; /* total nz of upper triangular part of mat */
934     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
935     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
936     mumps->nnz = nz;
937     mumps->irn = row;
938     mumps->jcn = col;
939     mumps->val = mumps->val_alloc = val;
940   } else {
941     val = mumps->val;
942   }
943 
944   jj = 0; irow = rstart;
945   for (i=0; i<m; i++) {
946     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
947     v1     = av + adiag[i];
948     countA = ai[i+1] - adiag[i];
949     countB = bi[i+1] - bi[i];
950     bjj    = bj + bi[i];
951     v2     = bv + bi[i];
952 
953     /* A-part */
954     for (j=0; j<countA; j++) {
955       if (reuse == MAT_INITIAL_MATRIX) {
956         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
957         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
958       }
959       val[jj++] = v1[j];
960     }
961 
962     /* B-part */
963     for (j=0; j < countB; j++) {
964       if (garray[bjj[j]] > rstart) {
965         if (reuse == MAT_INITIAL_MATRIX) {
966           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
967           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
968         }
969         val[jj++] = v2[j];
970       }
971     }
972     irow++;
973   }
974   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
975   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatDestroy_MUMPS(Mat A)
980 {
981   PetscErrorCode ierr;
982   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
983 
984   PetscFunctionBegin;
985   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
986   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
987   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
988   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
989   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
990   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
991   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
992   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
993   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
994   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
995   mumps->id.job = JOB_END;
996   PetscMUMPS_c(mumps);
997   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
998 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999   if (mumps->use_petsc_omp_support) {
1000     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1001     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1002     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1003   }
1004 #endif
1005   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1006   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1007   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1008   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1009   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1010   if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRMPI(ierr);}
1011   ierr = PetscFree(A->data);CHKERRQ(ierr);
1012 
1013   /* clear composed functions */
1014   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1015   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1016   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1017   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1018   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1032 {
1033   PetscErrorCode     ierr;
1034   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1035   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1036   PetscInt           i,m,M,rstart;
1037 
1038   PetscFunctionBegin;
1039   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1040   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1041   if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1042   if (ompsize == 1) {
1043     if (!mumps->irhs_loc) {
1044       mumps->nloc_rhs = m;
1045       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1046       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1047       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1048     }
1049     mumps->id.rhs_loc = (MumpsScalar*)array;
1050   } else {
1051   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1052     const PetscInt  *ranges;
1053     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1054     MPI_Group       petsc_group,omp_group;
1055     PetscScalar     *recvbuf=NULL;
1056 
1057     if (mumps->is_omp_master) {
1058       /* Lazily initialize the omp stuff for distributed rhs */
1059       if (!mumps->irhs_loc) {
1060         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1061         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1062         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr);
1063         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr);
1064         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1065         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr);
1066 
1067         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1068         mumps->nloc_rhs = 0;
1069         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1070         for (j=0; j<ompsize; j++) {
1071           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1072           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1073         }
1074         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1075         for (j=k=0; j<ompsize; j++) {
1076           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1077         }
1078 
1079         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1080         ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr);
1081         ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr);
1082       }
1083 
1084       /* Realloc buffers when current nrhs is bigger than what we have met */
1085       if (nrhs > mumps->max_nrhs) {
1086         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1087         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1088         mumps->max_nrhs = nrhs;
1089       }
1090 
1091       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1092       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1093       mumps->rhs_disps[0] = 0;
1094       for (j=1; j<ompsize; j++) {
1095         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1096         if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1097       }
1098       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1099     }
1100 
1101     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1102     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr);
1103 
1104     if (mumps->is_omp_master) {
1105       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1106         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1107         for (j=0; j<ompsize; j++) {
1108           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1109           dst = dstbase;
1110           for (i=0; i<nrhs; i++) {
1111             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1112             src += mumps->rhs_nrow[j];
1113             dst += mumps->nloc_rhs;
1114           }
1115           dstbase += mumps->rhs_nrow[j];
1116         }
1117       }
1118       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1119     }
1120   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1121   }
1122   mumps->id.nrhs     = nrhs;
1123   mumps->id.nloc_rhs = mumps->nloc_rhs;
1124   mumps->id.lrhs_loc = mumps->nloc_rhs;
1125   mumps->id.irhs_loc = mumps->irhs_loc;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1130 {
1131   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1132   const PetscScalar  *rarray = NULL;
1133   PetscScalar        *array;
1134   IS                 is_iden,is_petsc;
1135   PetscErrorCode     ierr;
1136   PetscInt           i;
1137   PetscBool          second_solve = PETSC_FALSE;
1138   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1139 
1140   PetscFunctionBegin;
1141   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1142   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1143 
1144   if (A->factorerrortype) {
1145     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1146     ierr = VecSetInf(x);CHKERRQ(ierr);
1147     PetscFunctionReturn(0);
1148   }
1149 
1150   mumps->id.nrhs = 1;
1151   if (mumps->petsc_size > 1) {
1152     if (mumps->ICNTL20 == 10) {
1153       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1154       ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1155       ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1156     } else {
1157       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1158       ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159       ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       if (!mumps->myid) {
1161         ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1162         mumps->id.rhs = (MumpsScalar*)array;
1163       }
1164     }
1165   } else {  /* petsc_size == 1 */
1166     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1167     ierr = VecCopy(b,x);CHKERRQ(ierr);
1168     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1169     mumps->id.rhs = (MumpsScalar*)array;
1170   }
1171 
1172   /*
1173      handle condensation step of Schur complement (if any)
1174      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1175      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1176      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1177      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1178   */
1179   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1180     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1181     second_solve = PETSC_TRUE;
1182     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1183   }
1184   /* solve phase */
1185   /*-------------*/
1186   mumps->id.job = JOB_SOLVE;
1187   PetscMUMPS_c(mumps);
1188   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1189 
1190   /* handle expansion step of Schur complement (if any) */
1191   if (second_solve) {
1192     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1193   }
1194 
1195   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1196     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1197       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1198       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1199     }
1200     if (!mumps->scat_sol) { /* create scatter scat_sol */
1201       PetscInt *isol2_loc=NULL;
1202       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1203       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1204       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1205       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1206       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1207       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1208       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1209       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1210     }
1211 
1212     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214   }
1215 
1216   if (mumps->petsc_size > 1) {
1217     if (mumps->ICNTL20 == 10) {
1218       ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1219     } else if (!mumps->myid) {
1220       ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1221     }
1222   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1223 
1224   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1229 {
1230   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   mumps->id.ICNTL(9) = 0;
1235   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1236   mumps->id.ICNTL(9) = 1;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1241 {
1242   PetscErrorCode    ierr;
1243   Mat               Bt = NULL;
1244   PetscBool         denseX,denseB,flg,flgT;
1245   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1246   PetscInt          i,nrhs,M;
1247   PetscScalar       *array;
1248   const PetscScalar *rbray;
1249   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1250   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1251   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1252   IS                is_to,is_from;
1253   PetscInt          k,proc,j,m,myrstart;
1254   const PetscInt    *rstart;
1255   Vec               v_mpi,msol_loc;
1256   VecScatter        scat_sol;
1257   Vec               b_seq;
1258   VecScatter        scat_rhs;
1259   PetscScalar       *aa;
1260   PetscInt          spnr,*ia,*ja;
1261   Mat_MPIAIJ        *b = NULL;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1265   if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1266 
1267   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1268   if (denseB) {
1269     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1270     mumps->id.ICNTL(20)= 0; /* dense RHS */
1271   } else { /* sparse B */
1272     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1273     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1274     if (flgT) { /* input B is transpose of actural RHS matrix,
1275                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1276       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1277     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1278     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1279   }
1280 
1281   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1282   mumps->id.nrhs = nrhs;
1283   mumps->id.lrhs = M;
1284   mumps->id.rhs  = NULL;
1285 
1286   if (mumps->petsc_size == 1) {
1287     PetscScalar *aa;
1288     PetscInt    spnr,*ia,*ja;
1289     PetscBool   second_solve = PETSC_FALSE;
1290 
1291     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1292     mumps->id.rhs = (MumpsScalar*)array;
1293 
1294     if (denseB) {
1295       /* copy B to X */
1296       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1297       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1298       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1299     } else { /* sparse B */
1300       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1301       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1302       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1303       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1304       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1305     }
1306     /* handle condensation step of Schur complement (if any) */
1307     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1308       second_solve = PETSC_TRUE;
1309       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1310     }
1311     /* solve phase */
1312     /*-------------*/
1313     mumps->id.job = JOB_SOLVE;
1314     PetscMUMPS_c(mumps);
1315     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1316 
1317     /* handle expansion step of Schur complement (if any) */
1318     if (second_solve) {
1319       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1320     }
1321     if (!denseB) { /* sparse B */
1322       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1323       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1324       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1325     }
1326     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1327     PetscFunctionReturn(0);
1328   }
1329 
1330   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1331   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1332 
1333   /* create msol_loc to hold mumps local solution */
1334   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1335   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1336 
1337   lsol_loc  = mumps->id.lsol_loc;
1338   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1339   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1340   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1341   mumps->id.isol_loc = isol_loc;
1342 
1343   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1344 
1345   if (denseB) {
1346     if (mumps->ICNTL20 == 10) {
1347       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1348       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1349       ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1350       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1351       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1352       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1353     } else {
1354       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1355       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1356         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1357         0, re-arrange B into desired order, which is a local operation.
1358       */
1359 
1360       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1361       /* wrap dense rhs matrix B into a vector v_mpi */
1362       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1363       ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1364       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1365       ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1366 
1367       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1368       if (!mumps->myid) {
1369         PetscInt *idx;
1370         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1371         ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1372         ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1373         k = 0;
1374         for (proc=0; proc<mumps->petsc_size; proc++) {
1375           for (j=0; j<nrhs; j++) {
1376             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1377           }
1378         }
1379 
1380         ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1381         ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1382         ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1383       } else {
1384         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1385         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1386         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1387       }
1388       ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1389       ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1390       ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1391       ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1392       ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393 
1394       if (!mumps->myid) { /* define rhs on the host */
1395         ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1396         mumps->id.rhs = (MumpsScalar*)bray;
1397         ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1398       }
1399     }
1400   } else { /* sparse B */
1401     b = (Mat_MPIAIJ*)Bt->data;
1402 
1403     /* wrap dense X into a vector v_mpi */
1404     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1405     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1406     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1407     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1408 
1409     if (!mumps->myid) {
1410       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1411       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1412       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1413       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1414       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1415     } else {
1416       mumps->id.irhs_ptr    = NULL;
1417       mumps->id.irhs_sparse = NULL;
1418       mumps->id.nz_rhs      = 0;
1419       mumps->id.rhs_sparse  = NULL;
1420     }
1421   }
1422 
1423   /* solve phase */
1424   /*-------------*/
1425   mumps->id.job = JOB_SOLVE;
1426   PetscMUMPS_c(mumps);
1427   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1428 
1429   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1430   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1431   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1432 
1433   /* create scatter scat_sol */
1434   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1435   /* iidx: index for scatter mumps solution to petsc X */
1436 
1437   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1438   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1439   for (i=0; i<lsol_loc; i++) {
1440     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1441 
1442     for (proc=0; proc<mumps->petsc_size; proc++) {
1443       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1444         myrstart = rstart[proc];
1445         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1446         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1447         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1448         break;
1449       }
1450     }
1451 
1452     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1453   }
1454   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1455   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1456   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1458   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1459   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1461 
1462   /* free spaces */
1463   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1464   mumps->id.isol_loc = isol_loc_save;
1465 
1466   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1467   ierr = PetscFree(idxx);CHKERRQ(ierr);
1468   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1469   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1470   if (!denseB) {
1471     if (!mumps->myid) {
1472       b = (Mat_MPIAIJ*)Bt->data;
1473       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1474       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1475       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1476     }
1477   } else {
1478     if (mumps->ICNTL20 == 0) {
1479       ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1480       ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1481     }
1482   }
1483   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1484   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1489 {
1490   PetscErrorCode ierr;
1491   PetscBool      flg;
1492   Mat            B;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1496   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1497 
1498   /* Create B=Bt^T that uses Bt's data structure */
1499   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1500 
1501   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1502   ierr = MatDestroy(&B);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #if !defined(PETSC_USE_COMPLEX)
1507 /*
1508   input:
1509    F:        numeric factor
1510   output:
1511    nneg:     total number of negative pivots
1512    nzero:    total number of zero pivots
1513    npos:     (global dimension of F) - nneg - nzero
1514 */
1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1516 {
1517   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1518   PetscErrorCode ierr;
1519   PetscMPIInt    size;
1520 
1521   PetscFunctionBegin;
1522   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr);
1523   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1524   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1525 
1526   if (nneg) *nneg = mumps->id.INFOG(12);
1527   if (nzero || npos) {
1528     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1529     if (nzero) *nzero = mumps->id.INFOG(28);
1530     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 #endif
1535 
1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1537 {
1538   PetscErrorCode ierr;
1539   PetscInt       i,nreqs;
1540   PetscMUMPSInt  *irn,*jcn;
1541   PetscMPIInt    count;
1542   PetscInt64     totnnz,remain;
1543   const PetscInt osize=mumps->omp_comm_size;
1544   PetscScalar    *val;
1545 
1546   PetscFunctionBegin;
1547   if (osize > 1) {
1548     if (reuse == MAT_INITIAL_MATRIX) {
1549       /* master first gathers counts of nonzeros to receive */
1550       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1551       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr);
1552 
1553       /* Then each computes number of send/recvs */
1554       if (mumps->is_omp_master) {
1555         /* Start from 1 since self communication is not done in MPI */
1556         nreqs = 0;
1557         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1558       } else {
1559         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1560       }
1561       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1562 
1563       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1564          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1565          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1566          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1567        */
1568       nreqs = 0; /* counter for actual send/recvs */
1569       if (mumps->is_omp_master) {
1570         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1571         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1572         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1573 
1574         /* Self communication */
1575         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1576         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1577         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1578 
1579         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1580         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1581         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1582         mumps->nnz = totnnz;
1583         mumps->irn = irn;
1584         mumps->jcn = jcn;
1585         mumps->val = mumps->val_alloc = val;
1586 
1587         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1588         jcn += mumps->recvcount[0];
1589         val += mumps->recvcount[0];
1590 
1591         /* Remote communication */
1592         for (i=1; i<osize; i++) {
1593           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1594           remain = mumps->recvcount[i] - count;
1595           while (count>0) {
1596             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1597             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1598             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1599             irn    += count;
1600             jcn    += count;
1601             val    += count;
1602             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1603             remain -= count;
1604           }
1605         }
1606       } else {
1607         irn    = mumps->irn;
1608         jcn    = mumps->jcn;
1609         val    = mumps->val;
1610         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1611         remain = mumps->nnz - count;
1612         while (count>0) {
1613           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1614           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1615           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1616           irn    += count;
1617           jcn    += count;
1618           val    += count;
1619           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1620           remain -= count;
1621         }
1622       }
1623     } else {
1624       nreqs = 0;
1625       if (mumps->is_omp_master) {
1626         val = mumps->val + mumps->recvcount[0];
1627         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1628           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1629           remain = mumps->recvcount[i] - count;
1630           while (count>0) {
1631             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1632             val    += count;
1633             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1634             remain -= count;
1635           }
1636         }
1637       } else {
1638         val    = mumps->val;
1639         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1640         remain = mumps->nnz - count;
1641         while (count>0) {
1642           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1643           val    += count;
1644           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1645           remain -= count;
1646         }
1647       }
1648     }
1649     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
1650     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1651   }
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1656 {
1657   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1658   PetscErrorCode ierr;
1659   PetscBool      isMPIAIJ;
1660 
1661   PetscFunctionBegin;
1662   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1663     if (mumps->id.INFOG(1) == -6) {
1664       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1665     }
1666     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1667     PetscFunctionReturn(0);
1668   }
1669 
1670   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1671   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1672 
1673   /* numerical factorization phase */
1674   /*-------------------------------*/
1675   mumps->id.job = JOB_FACTNUMERIC;
1676   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1677     if (!mumps->myid) {
1678       mumps->id.a = (MumpsScalar*)mumps->val;
1679     }
1680   } else {
1681     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1682   }
1683   PetscMUMPS_c(mumps);
1684   if (mumps->id.INFOG(1) < 0) {
1685     if (A->erroriffailure) {
1686       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1687     } else {
1688       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1689         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1690         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1691       } else if (mumps->id.INFOG(1) == -13) {
1692         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1693         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1694       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1695         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1696         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1697       } else {
1698         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1699         F->factorerrortype = MAT_FACTOR_OTHER;
1700       }
1701     }
1702   }
1703   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1704 
1705   F->assembled    = PETSC_TRUE;
1706   mumps->matstruc = SAME_NONZERO_PATTERN;
1707   if (F->schur) { /* reset Schur status to unfactored */
1708 #if defined(PETSC_HAVE_CUDA)
1709     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1710 #endif
1711     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1712       mumps->id.ICNTL(19) = 2;
1713       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1714     }
1715     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1716   }
1717 
1718   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1719   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1720 
1721   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1722   if (mumps->petsc_size > 1) {
1723     PetscInt    lsol_loc;
1724     PetscScalar *sol_loc;
1725 
1726     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1727 
1728     /* distributed solution; Create x_seq=sol_loc for repeated use */
1729     if (mumps->x_seq) {
1730       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1731       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1732       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1733     }
1734     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1735     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1736     mumps->id.lsol_loc = lsol_loc;
1737     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1738     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1739   }
1740   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 /* Sets MUMPS options from the options database */
1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1746 {
1747   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1748   PetscErrorCode ierr;
1749   PetscMUMPSInt  icntl=0;
1750   PetscInt       info[80],i,ninfo=80;
1751   PetscBool      flg=PETSC_FALSE;
1752 
1753   PetscFunctionBegin;
1754   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1755   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1756   if (flg) mumps->id.ICNTL(1) = icntl;
1757   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1758   if (flg) mumps->id.ICNTL(2) = icntl;
1759   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1760   if (flg) mumps->id.ICNTL(3) = icntl;
1761 
1762   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1763   if (flg) mumps->id.ICNTL(4) = icntl;
1764   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1765 
1766   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1767   if (flg) mumps->id.ICNTL(6) = icntl;
1768 
1769   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1770   if (flg) {
1771     if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto\n");
1772     mumps->id.ICNTL(7) = icntl;
1773   }
1774 
1775   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1776   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1777   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1778   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1780   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1781   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1782   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1783   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1784     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1785     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1786   }
1787 
1788   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1789      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1790      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1791      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1792      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1793      In short, we could not use distributed RHS with MPICH until v4.0b1.
1794    */
1795 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1796   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1797 #else
1798   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1799 #endif
1800   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1801   if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20);
1802 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1803   if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n");
1804 #endif
1805   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1806 
1807   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1808   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1809   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1810   if (mumps->id.ICNTL(24)) {
1811     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1812   }
1813 
1814   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1815   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1816   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1817   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1818   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1819   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1820   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1821   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1822   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1823   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1824   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1825   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1826 
1827   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1828   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1829   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1830   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1831   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1832   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1833 
1834   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1835 
1836   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1837   if (ninfo) {
1838     if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1839     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1840     mumps->ninfo = ninfo;
1841     for (i=0; i<ninfo; i++) {
1842       if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1843       else  mumps->info[i] = info[i];
1844     }
1845   }
1846 
1847   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1852 {
1853   PetscErrorCode ierr;
1854   PetscInt       nthreads=0;
1855   MPI_Comm       newcomm=MPI_COMM_NULL;
1856 
1857   PetscFunctionBegin;
1858   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1859   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr);
1860   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */
1861 
1862   ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1863   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1864   ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1865   if (mumps->use_petsc_omp_support) {
1866 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1867     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1868     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1869 #else
1870     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1871 #endif
1872   } else {
1873     mumps->omp_comm      = PETSC_COMM_SELF;
1874     mumps->mumps_comm    = mumps->petsc_comm;
1875     mumps->is_omp_master = PETSC_TRUE;
1876   }
1877   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr);
1878   mumps->reqs = NULL;
1879   mumps->tag  = 0;
1880 
1881   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1882   if (mumps->mumps_comm != MPI_COMM_NULL) {
1883     ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRMPI(ierr);
1884     mumps->mumps_comm = newcomm;
1885   }
1886 
1887   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1888   mumps->id.job = JOB_INIT;
1889   mumps->id.par = 1;  /* host participates factorizaton and solve */
1890   mumps->id.sym = mumps->sym;
1891 
1892   PetscMUMPS_c(mumps);
1893   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1894 
1895   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1896      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1897    */
1898   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRMPI(ierr);
1899   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr);
1900 
1901   mumps->scat_rhs = NULL;
1902   mumps->scat_sol = NULL;
1903 
1904   /* set PETSc-MUMPS default options - override MUMPS default */
1905   mumps->id.ICNTL(3) = 0;
1906   mumps->id.ICNTL(4) = 0;
1907   if (mumps->petsc_size == 1) {
1908     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1909     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1910   } else {
1911     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1912     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1913   }
1914 
1915   /* schur */
1916   mumps->id.size_schur    = 0;
1917   mumps->id.listvar_schur = NULL;
1918   mumps->id.schur         = NULL;
1919   mumps->sizeredrhs       = 0;
1920   mumps->schur_sol        = NULL;
1921   mumps->schur_sizesol    = 0;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1926 {
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   if (mumps->id.INFOG(1) < 0) {
1931     if (A->erroriffailure) {
1932       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1933     } else {
1934       if (mumps->id.INFOG(1) == -6) {
1935         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1936         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1937       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1938         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1939         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1940       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1941         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1942       } else {
1943         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1944         F->factorerrortype = MAT_FACTOR_OTHER;
1945       }
1946     }
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1952 {
1953   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1954   PetscErrorCode ierr;
1955   Vec            b;
1956   const PetscInt M = A->rmap->N;
1957 
1958   PetscFunctionBegin;
1959   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1960 
1961   /* Set MUMPS options from the options database */
1962   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1963 
1964   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1965   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1966 
1967   /* analysis phase */
1968   /*----------------*/
1969   mumps->id.job = JOB_FACTSYMBOLIC;
1970   mumps->id.n   = M;
1971   switch (mumps->id.ICNTL(18)) {
1972   case 0:  /* centralized assembled matrix input */
1973     if (!mumps->myid) {
1974       mumps->id.nnz = mumps->nnz;
1975       mumps->id.irn = mumps->irn;
1976       mumps->id.jcn = mumps->jcn;
1977       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1978       if (r) {
1979         mumps->id.ICNTL(7) = 1;
1980         if (!mumps->myid) {
1981           const PetscInt *idx;
1982           PetscInt       i;
1983 
1984           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1985           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1986           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1987           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1988         }
1989       }
1990     }
1991     break;
1992   case 3:  /* distributed assembled matrix input (size>1) */
1993     mumps->id.nnz_loc = mumps->nnz;
1994     mumps->id.irn_loc = mumps->irn;
1995     mumps->id.jcn_loc = mumps->jcn;
1996     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1997     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1998       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1999       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2000       ierr = VecDestroy(&b);CHKERRQ(ierr);
2001     }
2002     break;
2003   }
2004   PetscMUMPS_c(mumps);
2005   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2006 
2007   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2008   F->ops->solve           = MatSolve_MUMPS;
2009   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2010   F->ops->matsolve        = MatMatSolve_MUMPS;
2011   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 /* Note the Petsc r and c permutations are ignored */
2016 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2017 {
2018   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2019   PetscErrorCode ierr;
2020   Vec            b;
2021   const PetscInt M = A->rmap->N;
2022 
2023   PetscFunctionBegin;
2024   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2025 
2026   /* Set MUMPS options from the options database */
2027   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2028 
2029   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2030   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2031 
2032   /* analysis phase */
2033   /*----------------*/
2034   mumps->id.job = JOB_FACTSYMBOLIC;
2035   mumps->id.n   = M;
2036   switch (mumps->id.ICNTL(18)) {
2037   case 0:  /* centralized assembled matrix input */
2038     if (!mumps->myid) {
2039       mumps->id.nnz = mumps->nnz;
2040       mumps->id.irn = mumps->irn;
2041       mumps->id.jcn = mumps->jcn;
2042       if (mumps->id.ICNTL(6)>1) {
2043         mumps->id.a = (MumpsScalar*)mumps->val;
2044       }
2045     }
2046     break;
2047   case 3:  /* distributed assembled matrix input (size>1) */
2048     mumps->id.nnz_loc = mumps->nnz;
2049     mumps->id.irn_loc = mumps->irn;
2050     mumps->id.jcn_loc = mumps->jcn;
2051     if (mumps->id.ICNTL(6)>1) {
2052       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2053     }
2054     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2055       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2056       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2057       ierr = VecDestroy(&b);CHKERRQ(ierr);
2058     }
2059     break;
2060   }
2061   PetscMUMPS_c(mumps);
2062   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2063 
2064   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2065   F->ops->solve           = MatSolve_MUMPS;
2066   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 /* Note the Petsc r permutation and factor info are ignored */
2071 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2072 {
2073   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2074   PetscErrorCode ierr;
2075   Vec            b;
2076   const PetscInt M = A->rmap->N;
2077 
2078   PetscFunctionBegin;
2079   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2080 
2081   /* Set MUMPS options from the options database */
2082   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2083 
2084   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2085   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2086 
2087   /* analysis phase */
2088   /*----------------*/
2089   mumps->id.job = JOB_FACTSYMBOLIC;
2090   mumps->id.n   = M;
2091   switch (mumps->id.ICNTL(18)) {
2092   case 0:  /* centralized assembled matrix input */
2093     if (!mumps->myid) {
2094       mumps->id.nnz = mumps->nnz;
2095       mumps->id.irn = mumps->irn;
2096       mumps->id.jcn = mumps->jcn;
2097       if (mumps->id.ICNTL(6)>1) {
2098         mumps->id.a = (MumpsScalar*)mumps->val;
2099       }
2100     }
2101     break;
2102   case 3:  /* distributed assembled matrix input (size>1) */
2103     mumps->id.nnz_loc = mumps->nnz;
2104     mumps->id.irn_loc = mumps->irn;
2105     mumps->id.jcn_loc = mumps->jcn;
2106     if (mumps->id.ICNTL(6)>1) {
2107       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2108     }
2109     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2110       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2111       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2112       ierr = VecDestroy(&b);CHKERRQ(ierr);
2113     }
2114     break;
2115   }
2116   PetscMUMPS_c(mumps);
2117   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2118 
2119   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2120   F->ops->solve                 = MatSolve_MUMPS;
2121   F->ops->solvetranspose        = MatSolve_MUMPS;
2122   F->ops->matsolve              = MatMatSolve_MUMPS;
2123   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2124 #if defined(PETSC_USE_COMPLEX)
2125   F->ops->getinertia = NULL;
2126 #else
2127   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2128 #endif
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2133 {
2134   PetscErrorCode    ierr;
2135   PetscBool         iascii;
2136   PetscViewerFormat format;
2137   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2138 
2139   PetscFunctionBegin;
2140   /* check if matrix is mumps type */
2141   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2142 
2143   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2144   if (iascii) {
2145     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2146     if (format == PETSC_VIEWER_ASCII_INFO) {
2147       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2148       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym);CHKERRQ(ierr);
2149       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par);CHKERRQ(ierr);
2150       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2151       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2152       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2153       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2154       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2155       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2156       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2157       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2158       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2159       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2160       if (mumps->id.ICNTL(11)>0) {
2161         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2162         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2163         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2164         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2165         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2166         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2167       }
2168       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2169       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2171       /* ICNTL(15-17) not used */
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2173       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2174       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2175       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2176       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2177       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2178 
2179       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2180       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2181       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2182       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2183       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2184       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2185 
2186       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2187       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2188       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2189       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2190       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2191       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2192 
2193       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2194       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2195       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2196       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2197       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2198       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2199 
2200       /* information local to each processor */
2201       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2202       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2203       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2204       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2205       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2206       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2207       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2208       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2209       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2210       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2211 
2212       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2213       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2214       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2215 
2216       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2217       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2218       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2219 
2220       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2221       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2222       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2223 
2224       if (mumps->ninfo && mumps->ninfo <= 80) {
2225         PetscInt i;
2226         for (i=0; i<mumps->ninfo; i++) {
2227           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr);
2228           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2229           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2230         }
2231       }
2232       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2233 
2234       if (!mumps->myid) { /* information from the host */
2235         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2236         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2237         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2238         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2239 
2240         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr);
2241         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr);
2242         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr);
2243         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr);
2244         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr);
2245         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr);
2246         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr);
2247         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr);
2248         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr);
2249         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr);
2250         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr);
2252         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr);
2253         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr);
2254         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr);
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr);
2263         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2264         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2265         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2266         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2267         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2268         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2269         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2270         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr);
2271         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr);
2272         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr);
2273         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr);
2274       }
2275     }
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2281 {
2282   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2283 
2284   PetscFunctionBegin;
2285   info->block_size        = 1.0;
2286   info->nz_allocated      = mumps->id.INFOG(20);
2287   info->nz_used           = mumps->id.INFOG(20);
2288   info->nz_unneeded       = 0.0;
2289   info->assemblies        = 0.0;
2290   info->mallocs           = 0.0;
2291   info->memory            = 0.0;
2292   info->fill_ratio_given  = 0;
2293   info->fill_ratio_needed = 0;
2294   info->factor_mallocs    = 0;
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 /* -------------------------------------------------------------------------------------------*/
2299 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2300 {
2301   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2302   const PetscScalar *arr;
2303   const PetscInt    *idxs;
2304   PetscInt          size,i;
2305   PetscErrorCode    ierr;
2306 
2307   PetscFunctionBegin;
2308   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2309   if (mumps->petsc_size > 1) {
2310     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2311 
2312     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2313     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr);
2314     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2315   }
2316 
2317   /* Schur complement matrix */
2318   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2319   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2320   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2321   mumps->id.schur      = (MumpsScalar*)arr;
2322   mumps->id.size_schur = size;
2323   mumps->id.schur_lld  = size;
2324   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2325   if (mumps->sym == 1) {
2326     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2327   }
2328 
2329   /* MUMPS expects Fortran style indices */
2330   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2331   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2332   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2333   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2334   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2335   if (mumps->petsc_size > 1) {
2336     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2337   } else {
2338     if (F->factortype == MAT_FACTOR_LU) {
2339       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2340     } else {
2341       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2342     }
2343   }
2344   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2345   mumps->id.ICNTL(26) = -1;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 /* -------------------------------------------------------------------------------------------*/
2350 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2351 {
2352   Mat            St;
2353   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2354   PetscScalar    *array;
2355 #if defined(PETSC_USE_COMPLEX)
2356   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2357 #endif
2358   PetscErrorCode ierr;
2359 
2360   PetscFunctionBegin;
2361   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2362   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2363   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2364   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2365   ierr = MatSetUp(St);CHKERRQ(ierr);
2366   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2367   if (!mumps->sym) { /* MUMPS always return a full matrix */
2368     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2369       PetscInt i,j,N=mumps->id.size_schur;
2370       for (i=0;i<N;i++) {
2371         for (j=0;j<N;j++) {
2372 #if !defined(PETSC_USE_COMPLEX)
2373           PetscScalar val = mumps->id.schur[i*N+j];
2374 #else
2375           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2376 #endif
2377           array[j*N+i] = val;
2378         }
2379       }
2380     } else { /* stored by columns */
2381       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2382     }
2383   } else { /* either full or lower-triangular (not packed) */
2384     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2385       PetscInt i,j,N=mumps->id.size_schur;
2386       for (i=0;i<N;i++) {
2387         for (j=i;j<N;j++) {
2388 #if !defined(PETSC_USE_COMPLEX)
2389           PetscScalar val = mumps->id.schur[i*N+j];
2390 #else
2391           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2392 #endif
2393           array[i*N+j] = val;
2394           array[j*N+i] = val;
2395         }
2396       }
2397     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2398       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2399     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2400       PetscInt i,j,N=mumps->id.size_schur;
2401       for (i=0;i<N;i++) {
2402         for (j=0;j<i+1;j++) {
2403 #if !defined(PETSC_USE_COMPLEX)
2404           PetscScalar val = mumps->id.schur[i*N+j];
2405 #else
2406           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2407 #endif
2408           array[i*N+j] = val;
2409           array[j*N+i] = val;
2410         }
2411       }
2412     }
2413   }
2414   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2415   *S   = St;
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 /* -------------------------------------------------------------------------------------------*/
2420 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2421 {
2422   PetscErrorCode ierr;
2423   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2431 {
2432   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2433 
2434   PetscFunctionBegin;
2435   *ival = mumps->id.ICNTL(icntl);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 /*@
2440   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2441 
2442    Logically Collective on Mat
2443 
2444    Input Parameters:
2445 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2446 .  icntl - index of MUMPS parameter array ICNTL()
2447 -  ival - value of MUMPS ICNTL(icntl)
2448 
2449   Options Database:
2450 .   -mat_mumps_icntl_<icntl> <ival>
2451 
2452    Level: beginner
2453 
2454    References:
2455 .     MUMPS Users' Guide
2456 
2457 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2458  @*/
2459 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2460 {
2461   PetscErrorCode ierr;
2462 
2463   PetscFunctionBegin;
2464   PetscValidType(F,1);
2465   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2466   PetscValidLogicalCollectiveInt(F,icntl,2);
2467   PetscValidLogicalCollectiveInt(F,ival,3);
2468   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 /*@
2473   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2474 
2475    Logically Collective on Mat
2476 
2477    Input Parameters:
2478 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2479 -  icntl - index of MUMPS parameter array ICNTL()
2480 
2481   Output Parameter:
2482 .  ival - value of MUMPS ICNTL(icntl)
2483 
2484    Level: beginner
2485 
2486    References:
2487 .     MUMPS Users' Guide
2488 
2489 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2490 @*/
2491 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2492 {
2493   PetscErrorCode ierr;
2494 
2495   PetscFunctionBegin;
2496   PetscValidType(F,1);
2497   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2498   PetscValidLogicalCollectiveInt(F,icntl,2);
2499   PetscValidIntPointer(ival,3);
2500   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 /* -------------------------------------------------------------------------------------------*/
2505 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2506 {
2507   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2508 
2509   PetscFunctionBegin;
2510   mumps->id.CNTL(icntl) = val;
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2515 {
2516   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2517 
2518   PetscFunctionBegin;
2519   *val = mumps->id.CNTL(icntl);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /*@
2524   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2525 
2526    Logically Collective on Mat
2527 
2528    Input Parameters:
2529 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2530 .  icntl - index of MUMPS parameter array CNTL()
2531 -  val - value of MUMPS CNTL(icntl)
2532 
2533   Options Database:
2534 .   -mat_mumps_cntl_<icntl> <val>
2535 
2536    Level: beginner
2537 
2538    References:
2539 .     MUMPS Users' Guide
2540 
2541 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2542 @*/
2543 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2544 {
2545   PetscErrorCode ierr;
2546 
2547   PetscFunctionBegin;
2548   PetscValidType(F,1);
2549   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2550   PetscValidLogicalCollectiveInt(F,icntl,2);
2551   PetscValidLogicalCollectiveReal(F,val,3);
2552   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 /*@
2557   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2558 
2559    Logically Collective on Mat
2560 
2561    Input Parameters:
2562 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2563 -  icntl - index of MUMPS parameter array CNTL()
2564 
2565   Output Parameter:
2566 .  val - value of MUMPS CNTL(icntl)
2567 
2568    Level: beginner
2569 
2570    References:
2571 .      MUMPS Users' Guide
2572 
2573 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2574 @*/
2575 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2576 {
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   PetscValidType(F,1);
2581   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2582   PetscValidLogicalCollectiveInt(F,icntl,2);
2583   PetscValidRealPointer(val,3);
2584   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2589 {
2590   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2591 
2592   PetscFunctionBegin;
2593   *info = mumps->id.INFO(icntl);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2598 {
2599   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2600 
2601   PetscFunctionBegin;
2602   *infog = mumps->id.INFOG(icntl);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2607 {
2608   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2609 
2610   PetscFunctionBegin;
2611   *rinfo = mumps->id.RINFO(icntl);
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2616 {
2617   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2618 
2619   PetscFunctionBegin;
2620   *rinfog = mumps->id.RINFOG(icntl);
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2625 {
2626   PetscErrorCode ierr;
2627   Mat            Bt = NULL,Btseq = NULL;
2628   PetscBool      flg;
2629   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2630   PetscScalar    *aa;
2631   PetscInt       spnr,*ia,*ja,M,nrhs;
2632 
2633   PetscFunctionBegin;
2634   PetscValidPointer(spRHS,2);
2635   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2636   if (flg) {
2637     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2638   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2639 
2640   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2641 
2642   if (mumps->petsc_size > 1) {
2643     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2644     Btseq = b->A;
2645   } else {
2646     Btseq = Bt;
2647   }
2648 
2649   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2650   mumps->id.nrhs = nrhs;
2651   mumps->id.lrhs = M;
2652   mumps->id.rhs  = NULL;
2653 
2654   if (!mumps->myid) {
2655     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2656     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2657     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2658     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2659     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2660   } else {
2661     mumps->id.irhs_ptr    = NULL;
2662     mumps->id.irhs_sparse = NULL;
2663     mumps->id.nz_rhs      = 0;
2664     mumps->id.rhs_sparse  = NULL;
2665   }
2666   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2667   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2668 
2669   /* solve phase */
2670   /*-------------*/
2671   mumps->id.job = JOB_SOLVE;
2672   PetscMUMPS_c(mumps);
2673   if (mumps->id.INFOG(1) < 0)
2674     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2675 
2676   if (!mumps->myid) {
2677     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2678     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2679     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2680   }
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 /*@
2685   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2686 
2687    Logically Collective on Mat
2688 
2689    Input Parameters:
2690 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2691 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2692 
2693   Output Parameter:
2694 . spRHS - requested entries of inverse of A
2695 
2696    Level: beginner
2697 
2698    References:
2699 .      MUMPS Users' Guide
2700 
2701 .seealso: MatGetFactor(), MatCreateTranspose()
2702 @*/
2703 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2704 {
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708   PetscValidType(F,1);
2709   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2710   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2715 {
2716   PetscErrorCode ierr;
2717   Mat            spRHS;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2721   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2722   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 /*@
2727   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2728 
2729    Logically Collective on Mat
2730 
2731    Input Parameters:
2732 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2733 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2734 
2735   Output Parameter:
2736 . spRHST - requested entries of inverse of A^T
2737 
2738    Level: beginner
2739 
2740    References:
2741 .      MUMPS Users' Guide
2742 
2743 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2744 @*/
2745 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2746 {
2747   PetscErrorCode ierr;
2748   PetscBool      flg;
2749 
2750   PetscFunctionBegin;
2751   PetscValidType(F,1);
2752   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2753   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2754   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2755 
2756   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 /*@
2761   MatMumpsGetInfo - Get MUMPS parameter INFO()
2762 
2763    Logically Collective on Mat
2764 
2765    Input Parameters:
2766 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2767 -  icntl - index of MUMPS parameter array INFO()
2768 
2769   Output Parameter:
2770 .  ival - value of MUMPS INFO(icntl)
2771 
2772    Level: beginner
2773 
2774    References:
2775 .      MUMPS Users' Guide
2776 
2777 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2778 @*/
2779 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2780 {
2781   PetscErrorCode ierr;
2782 
2783   PetscFunctionBegin;
2784   PetscValidType(F,1);
2785   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2786   PetscValidIntPointer(ival,3);
2787   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 /*@
2792   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2793 
2794    Logically Collective on Mat
2795 
2796    Input Parameters:
2797 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2798 -  icntl - index of MUMPS parameter array INFOG()
2799 
2800   Output Parameter:
2801 .  ival - value of MUMPS INFOG(icntl)
2802 
2803    Level: beginner
2804 
2805    References:
2806 .      MUMPS Users' Guide
2807 
2808 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2809 @*/
2810 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2811 {
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   PetscValidType(F,1);
2816   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2817   PetscValidIntPointer(ival,3);
2818   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 /*@
2823   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2824 
2825    Logically Collective on Mat
2826 
2827    Input Parameters:
2828 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2829 -  icntl - index of MUMPS parameter array RINFO()
2830 
2831   Output Parameter:
2832 .  val - value of MUMPS RINFO(icntl)
2833 
2834    Level: beginner
2835 
2836    References:
2837 .       MUMPS Users' Guide
2838 
2839 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2840 @*/
2841 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2842 {
2843   PetscErrorCode ierr;
2844 
2845   PetscFunctionBegin;
2846   PetscValidType(F,1);
2847   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2848   PetscValidRealPointer(val,3);
2849   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 /*@
2854   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2855 
2856    Logically Collective on Mat
2857 
2858    Input Parameters:
2859 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2860 -  icntl - index of MUMPS parameter array RINFOG()
2861 
2862   Output Parameter:
2863 .  val - value of MUMPS RINFOG(icntl)
2864 
2865    Level: beginner
2866 
2867    References:
2868 .      MUMPS Users' Guide
2869 
2870 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2871 @*/
2872 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2873 {
2874   PetscErrorCode ierr;
2875 
2876   PetscFunctionBegin;
2877   PetscValidType(F,1);
2878   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2879   PetscValidRealPointer(val,3);
2880   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 /*MC
2885   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2886   distributed and sequential matrices via the external package MUMPS.
2887 
2888   Works with MATAIJ and MATSBAIJ matrices
2889 
2890   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2891 
2892   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2893 
2894   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2895 
2896   Options Database Keys:
2897 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2898 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2899 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2900 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2901 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2902 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2903                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2904 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2905 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2906 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2907 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2908 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2909 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2910 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2911 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2912 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2913 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2914 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2915 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2916 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2917 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2918 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2919 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2920 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2921 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2922 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2923 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2924 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2925 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2926 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2927 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2928 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2929 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2930 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2931 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2932                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2933 
2934   Level: beginner
2935 
2936     Notes:
2937     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2938 
2939     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2940 $          KSPGetPC(ksp,&pc);
2941 $          PCFactorGetMatrix(pc,&mat);
2942 $          MatMumpsGetInfo(mat,....);
2943 $          MatMumpsGetInfog(mat,....); etc.
2944            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2945 
2946   Using MUMPS with 64-bit integers
2947     MUMPS provides 64-bit integer support in two build modes:
2948       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2949       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2950 
2951       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2952       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2953       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2954       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2955 
2956     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2957 
2958   Two modes to run MUMPS/PETSc with OpenMP
2959 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2960 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2961 
2962 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2963 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2964 
2965    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2966    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2967    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2968    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2969    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2970 
2971    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2972    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2973    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2974    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2975    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2976    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2977    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2978    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2979    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2980    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2981    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2982    examine the mapping result.
2983 
2984    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2985    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2986    calls omp_set_num_threads(m) internally before calling MUMPS.
2987 
2988    References:
2989 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2990 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2991 
2992 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
2993 
2994 M*/
2995 
2996 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2997 {
2998   PetscFunctionBegin;
2999   *type = MATSOLVERMUMPS;
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 /* MatGetFactor for Seq and MPI AIJ matrices */
3004 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3005 {
3006   Mat            B;
3007   PetscErrorCode ierr;
3008   Mat_MUMPS      *mumps;
3009   PetscBool      isSeqAIJ;
3010   PetscMPIInt    size;
3011 
3012   PetscFunctionBegin;
3013  #if defined(PETSC_USE_COMPLEX)
3014   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3015  #endif
3016   /* Create the factorization matrix */
3017   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3018   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3019   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3020   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3021   ierr = MatSetUp(B);CHKERRQ(ierr);
3022 
3023   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3024 
3025   B->ops->view    = MatView_MUMPS;
3026   B->ops->getinfo = MatGetInfo_MUMPS;
3027 
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3041 
3042   if (ftype == MAT_FACTOR_LU) {
3043     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3044     B->factortype            = MAT_FACTOR_LU;
3045     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3046     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3047     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3048     mumps->sym = 0;
3049   } else {
3050     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3051     B->factortype                  = MAT_FACTOR_CHOLESKY;
3052     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3053     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3054     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3055 #if defined(PETSC_USE_COMPLEX)
3056     mumps->sym = 2;
3057 #else
3058     if (A->spd_set && A->spd) mumps->sym = 1;
3059     else                      mumps->sym = 2;
3060 #endif
3061   }
3062 
3063   /* set solvertype */
3064   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3065   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3066   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3067   if (size == 1) {
3068     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3069     B->canuseordering = PETSC_TRUE;
3070   }
3071   B->ops->destroy = MatDestroy_MUMPS;
3072   B->data         = (void*)mumps;
3073 
3074   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3075 
3076   *F = B;
3077   PetscFunctionReturn(0);
3078 }
3079 
3080 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3081 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3082 {
3083   Mat            B;
3084   PetscErrorCode ierr;
3085   Mat_MUMPS      *mumps;
3086   PetscBool      isSeqSBAIJ;
3087   PetscMPIInt    size;
3088 
3089   PetscFunctionBegin;
3090  #if defined(PETSC_USE_COMPLEX)
3091   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3092  #endif
3093   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3094   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3095   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3096   ierr = MatSetUp(B);CHKERRQ(ierr);
3097 
3098   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3099   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3100   if (isSeqSBAIJ) {
3101     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3102   } else {
3103     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3104   }
3105 
3106   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3107   B->ops->view                   = MatView_MUMPS;
3108   B->ops->getinfo                = MatGetInfo_MUMPS;
3109 
3110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3123 
3124   B->factortype = MAT_FACTOR_CHOLESKY;
3125 #if defined(PETSC_USE_COMPLEX)
3126   mumps->sym = 2;
3127 #else
3128   if (A->spd_set && A->spd) mumps->sym = 1;
3129   else                      mumps->sym = 2;
3130 #endif
3131 
3132   /* set solvertype */
3133   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3134   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3135   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3136   if (size == 1) {
3137     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3138     B->canuseordering = PETSC_TRUE;
3139   }
3140   ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3141   B->ops->destroy = MatDestroy_MUMPS;
3142   B->data         = (void*)mumps;
3143 
3144   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3145 
3146   *F = B;
3147   PetscFunctionReturn(0);
3148 }
3149 
3150 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3151 {
3152   Mat            B;
3153   PetscErrorCode ierr;
3154   Mat_MUMPS      *mumps;
3155   PetscBool      isSeqBAIJ;
3156   PetscMPIInt    size;
3157 
3158   PetscFunctionBegin;
3159   /* Create the factorization matrix */
3160   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3161   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3162   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3163   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3164   ierr = MatSetUp(B);CHKERRQ(ierr);
3165 
3166   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3167   if (ftype == MAT_FACTOR_LU) {
3168     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3169     B->factortype            = MAT_FACTOR_LU;
3170     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3171     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3172     mumps->sym = 0;
3173     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3174   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3175 
3176   B->ops->view        = MatView_MUMPS;
3177   B->ops->getinfo     = MatGetInfo_MUMPS;
3178 
3179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3188   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3189   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3190   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3191   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3192 
3193   /* set solvertype */
3194   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3195   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3196   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3197   if (size == 1) {
3198     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3199     B->canuseordering = PETSC_TRUE;
3200   }
3201   B->ops->destroy = MatDestroy_MUMPS;
3202   B->data         = (void*)mumps;
3203 
3204   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3205 
3206   *F = B;
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 /* MatGetFactor for Seq and MPI SELL matrices */
3211 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3212 {
3213   Mat            B;
3214   PetscErrorCode ierr;
3215   Mat_MUMPS      *mumps;
3216   PetscBool      isSeqSELL;
3217   PetscMPIInt    size;
3218 
3219   PetscFunctionBegin;
3220   /* Create the factorization matrix */
3221   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3222   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3223   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3224   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3225   ierr = MatSetUp(B);CHKERRQ(ierr);
3226 
3227   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3228 
3229   B->ops->view        = MatView_MUMPS;
3230   B->ops->getinfo     = MatGetInfo_MUMPS;
3231 
3232   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3233   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3234   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3236   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3237   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3241   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3242   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3243 
3244   if (ftype == MAT_FACTOR_LU) {
3245     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3246     B->factortype            = MAT_FACTOR_LU;
3247     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3248     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3249     mumps->sym = 0;
3250     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3251   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3252 
3253   /* set solvertype */
3254   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3255   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3256   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3257   if (size == 1) {
3258     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3259     B->canuseordering = PETSC_TRUE;
3260   }
3261   B->ops->destroy = MatDestroy_MUMPS;
3262   B->data         = (void*)mumps;
3263 
3264   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3265 
3266   *F = B;
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3271 {
3272   PetscErrorCode ierr;
3273 
3274   PetscFunctionBegin;
3275   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3276   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3277   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3278   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3279   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3280   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3281   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3282   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3283   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3284   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3285   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3286   PetscFunctionReturn(0);
3287 }
3288 
3289