xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 2ad7e442857a3cef22c06b0e94de84654ca4e109)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14   #if defined(PETSC_USE_REAL_SINGLE)
15     #include <cmumps_c.h>
16   #else
17     #include <zmumps_c.h>
18   #endif
19 #else
20   #if defined(PETSC_USE_REAL_SINGLE)
21     #include <smumps_c.h>
22   #else
23     #include <dmumps_c.h>
24   #endif
25 #endif
26 EXTERN_C_END
27 #define JOB_INIT         -1
28 #define JOB_NULL         0
29 #define JOB_FACTSYMBOLIC 1
30 #define JOB_FACTNUMERIC  2
31 #define JOB_SOLVE        3
32 #define JOB_END          -2
33 
34 /* calls to MUMPS */
35 #if defined(PETSC_USE_COMPLEX)
36   #if defined(PETSC_USE_REAL_SINGLE)
37     #define MUMPS_c cmumps_c
38   #else
39     #define MUMPS_c zmumps_c
40   #endif
41 #else
42   #if defined(PETSC_USE_REAL_SINGLE)
43     #define MUMPS_c smumps_c
44   #else
45     #define MUMPS_c dmumps_c
46   #endif
47 #endif
48 
49 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
50    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
51    naming convention in PetscMPIInt, PetscBLASInt etc.
52 */
53 typedef MUMPS_INT PetscMUMPSInt;
54 
55 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
56   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
57     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
58   #endif
59 #else
60   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
61     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
62   #endif
63 #endif
64 
65 #define MPIU_MUMPSINT       MPI_INT
66 #define PETSC_MUMPS_INT_MAX 2147483647
67 #define PETSC_MUMPS_INT_MIN -2147483648
68 
69 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
70 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a, PetscMUMPSInt *b)
71 {
72   PetscFunctionBegin;
73 #if PetscDefined(USE_64BIT_INDICES)
74   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
75 #endif
76   *b = (PetscMUMPSInt)(a);
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
80 /* Put these utility routines here since they are only used in this file */
81 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
82 {
83   PetscInt  myval;
84   PetscBool myset;
85   PetscFunctionBegin;
86   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
87   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
88   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
89   if (set) *set = myset;
90   PetscFunctionReturn(PETSC_SUCCESS);
91 }
92 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
93 
94 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
95 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
96   #define PetscMUMPS_c(mumps) \
97     do { \
98       if (mumps->use_petsc_omp_support) { \
99         if (mumps->is_omp_master) { \
100           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
101           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
102           PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
103           PetscCall(PetscFPTrapPop()); \
104           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
105         } \
106         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
107         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
108          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
109          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
110          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
111       */ \
112         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
113         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfog), MPIU_REAL, 0, mumps->omp_comm)); \
114         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(mumps->id.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
115         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(mumps->id.rinfo), MPIU_REAL, 0, mumps->omp_comm)); \
116       } else { \
117         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
118         PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
119         PetscCall(PetscFPTrapPop()); \
120       } \
121     } while (0)
122 #else
123   #define PetscMUMPS_c(mumps) \
124     do { \
125       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
126       PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(&mumps->id)); \
127       PetscCall(PetscFPTrapPop()); \
128     } while (0)
129 #endif
130 
131 /* declare MumpsScalar */
132 #if defined(PETSC_USE_COMPLEX)
133   #if defined(PETSC_USE_REAL_SINGLE)
134     #define MumpsScalar mumps_complex
135   #else
136     #define MumpsScalar mumps_double_complex
137   #endif
138 #else
139   #define MumpsScalar PetscScalar
140 #endif
141 
142 /* macros s.t. indices match MUMPS documentation */
143 #define ICNTL(I)  icntl[(I)-1]
144 #define CNTL(I)   cntl[(I)-1]
145 #define INFOG(I)  infog[(I)-1]
146 #define INFO(I)   info[(I)-1]
147 #define RINFOG(I) rinfog[(I)-1]
148 #define RINFO(I)  rinfo[(I)-1]
149 
150 typedef struct Mat_MUMPS Mat_MUMPS;
151 struct Mat_MUMPS {
152 #if defined(PETSC_USE_COMPLEX)
153   #if defined(PETSC_USE_REAL_SINGLE)
154   CMUMPS_STRUC_C id;
155   #else
156   ZMUMPS_STRUC_C id;
157   #endif
158 #else
159   #if defined(PETSC_USE_REAL_SINGLE)
160   SMUMPS_STRUC_C id;
161   #else
162   DMUMPS_STRUC_C id;
163   #endif
164 #endif
165 
166   MatStructure   matstruc;
167   PetscMPIInt    myid, petsc_size;
168   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
169   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
170   PetscInt64     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
171   PetscMUMPSInt  sym;
172   MPI_Comm       mumps_comm;
173   PetscMUMPSInt *ICNTL_pre;
174   PetscReal     *CNTL_pre;
175   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
176   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
177   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
178   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
179 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
180   PetscInt    *rhs_nrow, max_nrhs;
181   PetscMPIInt *rhs_recvcounts, *rhs_disps;
182   PetscScalar *rhs_loc, *rhs_recvbuf;
183 #endif
184   Vec            b_seq, x_seq;
185   PetscInt       ninfo, *info; /* which INFO to display */
186   PetscInt       sizeredrhs;
187   PetscScalar   *schur_sol;
188   PetscInt       schur_sizesol;
189   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
190   PetscInt64     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
191   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
192 
193   /* Support for MATNEST */
194   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
195   PetscInt64  *nest_vals_start;
196   PetscScalar *nest_vals;
197 
198   /* stuff used by petsc/mumps OpenMP support*/
199   PetscBool    use_petsc_omp_support;
200   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
201   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is petsc matrix's comm */
202   PetscInt64  *recvcount;            /* a collection of nnz on omp_master */
203   PetscMPIInt  tag, omp_comm_size;
204   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
205   MPI_Request *reqs;
206 };
207 
208 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
209    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
210  */
211 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
212 {
213   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
214 
215   PetscFunctionBegin;
216 #if defined(PETSC_USE_64BIT_INDICES)
217   {
218     PetscInt i;
219     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
220       PetscCall(PetscFree(mumps->ia_alloc));
221       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
222       mumps->cur_ilen = nrow + 1;
223     }
224     if (nnz > mumps->cur_jlen) {
225       PetscCall(PetscFree(mumps->ja_alloc));
226       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
227       mumps->cur_jlen = nnz;
228     }
229     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &(mumps->ia_alloc[i])));
230     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &(mumps->ja_alloc[i])));
231     *ia_mumps = mumps->ia_alloc;
232     *ja_mumps = mumps->ja_alloc;
233   }
234 #else
235   *ia_mumps = ia;
236   *ja_mumps = ja;
237 #endif
238   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
243 {
244   PetscFunctionBegin;
245   PetscCall(PetscFree(mumps->id.listvar_schur));
246   PetscCall(PetscFree(mumps->id.redrhs));
247   PetscCall(PetscFree(mumps->schur_sol));
248   mumps->id.size_schur = 0;
249   mumps->id.schur_lld  = 0;
250   mumps->id.ICNTL(19)  = 0;
251   PetscFunctionReturn(PETSC_SUCCESS);
252 }
253 
254 /* solve with rhs in mumps->id.redrhs and return in the same location */
255 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
256 {
257   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
258   Mat                  S, B, X;
259   MatFactorSchurStatus schurstatus;
260   PetscInt             sizesol;
261 
262   PetscFunctionBegin;
263   PetscCall(MatFactorFactorizeSchurComplement(F));
264   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
265   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &B));
266   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
267 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
268   PetscCall(MatBindToCPU(B, S->boundtocpu));
269 #endif
270   switch (schurstatus) {
271   case MAT_FACTOR_SCHUR_FACTORED:
272     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, (PetscScalar *)mumps->id.redrhs, &X));
273     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
274 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
275     PetscCall(MatBindToCPU(X, S->boundtocpu));
276 #endif
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatMatSolveTranspose(S, B, X));
279     } else {
280       PetscCall(MatMatSolve(S, B, X));
281     }
282     break;
283   case MAT_FACTOR_SCHUR_INVERTED:
284     sizesol = mumps->id.nrhs * mumps->id.size_schur;
285     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
286       PetscCall(PetscFree(mumps->schur_sol));
287       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
288       mumps->schur_sizesol = sizesol;
289     }
290     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
291     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
292 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
293     PetscCall(MatBindToCPU(X, S->boundtocpu));
294 #endif
295     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
296     if (!mumps->id.ICNTL(9)) { /* transpose solve */
297       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
298     } else {
299       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
300     }
301     PetscCall(MatProductSetFromOptions(X));
302     PetscCall(MatProductSymbolic(X));
303     PetscCall(MatProductNumeric(X));
304 
305     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
306     break;
307   default:
308     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
309   }
310   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
311   PetscCall(MatDestroy(&B));
312   PetscCall(MatDestroy(&X));
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
317 {
318   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
319 
320   PetscFunctionBegin;
321   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
322     PetscFunctionReturn(PETSC_SUCCESS);
323   }
324   if (!expansion) { /* prepare for the condensation step */
325     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
326     /* allocate MUMPS internal array to store reduced right-hand sides */
327     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
328       PetscCall(PetscFree(mumps->id.redrhs));
329       mumps->id.lredrhs = mumps->id.size_schur;
330       PetscCall(PetscMalloc1(mumps->id.nrhs * mumps->id.lredrhs, &mumps->id.redrhs));
331       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
332     }
333   } else { /* prepare for the expansion step */
334     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
335     PetscCall(MatMumpsSolveSchur_Private(F));
336     mumps->id.ICNTL(26) = 2; /* expansion phase */
337     PetscMUMPS_c(mumps);
338     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
339     /* restore defaults */
340     mumps->id.ICNTL(26) = -1;
341     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
342     if (mumps->id.nrhs > 1) {
343       PetscCall(PetscFree(mumps->id.redrhs));
344       mumps->id.lredrhs = 0;
345       mumps->sizeredrhs = 0;
346     }
347   }
348   PetscFunctionReturn(PETSC_SUCCESS);
349 }
350 
351 /*
352   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
353 
354   input:
355     A       - matrix in aij,baij or sbaij format
356     shift   - 0: C style output triple; 1: Fortran style output triple.
357     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
358               MAT_REUSE_MATRIX:   only the values in v array are updated
359   output:
360     nnz     - dim of r, c, and v (number of local nonzero entries of A)
361     r, c, v - row and col index, matrix values (matrix triples)
362 
363   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
364   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
365   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
366 
367  */
368 
369 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
370 {
371   const PetscScalar *av;
372   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
373   PetscInt64         nz, rnz, i, j, k;
374   PetscMUMPSInt     *row, *col;
375   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
376 
377   PetscFunctionBegin;
378   PetscCall(MatSeqAIJGetArrayRead(A, &av));
379   if (reuse == MAT_INITIAL_MATRIX) {
380     nz = aa->nz;
381     ai = aa->i;
382     aj = aa->j;
383     PetscCall(PetscMalloc2(nz, &row, nz, &col));
384     for (i = k = 0; i < M; i++) {
385       rnz = ai[i + 1] - ai[i];
386       ajj = aj + ai[i];
387       for (j = 0; j < rnz; j++) {
388         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
389         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
390         k++;
391       }
392     }
393     mumps->val = (PetscScalar *)av;
394     mumps->irn = row;
395     mumps->jcn = col;
396     mumps->nnz = nz;
397   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
398   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
399   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
404 {
405   PetscInt64     nz, i, j, k, r;
406   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
407   PetscMUMPSInt *row, *col;
408 
409   PetscFunctionBegin;
410   nz = a->sliidx[a->totalslices];
411   if (reuse == MAT_INITIAL_MATRIX) {
412     PetscCall(PetscMalloc2(nz, &row, nz, &col));
413     for (i = k = 0; i < a->totalslices; i++) {
414       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
415     }
416     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
417     mumps->irn = row;
418     mumps->jcn = col;
419     mumps->nnz = nz;
420     mumps->val = a->val;
421   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
422   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
427 {
428   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
429   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
430   PetscInt64      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
431   PetscInt        bs;
432   PetscMUMPSInt  *row, *col;
433 
434   PetscFunctionBegin;
435   if (reuse == MAT_INITIAL_MATRIX) {
436     PetscCall(MatGetBlockSize(A, &bs));
437     M  = A->rmap->N / bs;
438     ai = aa->i;
439     aj = aa->j;
440     PetscCall(PetscMalloc2(nz, &row, nz, &col));
441     for (i = 0; i < M; i++) {
442       ajj = aj + ai[i];
443       rnz = ai[i + 1] - ai[i];
444       for (k = 0; k < rnz; k++) {
445         for (j = 0; j < bs; j++) {
446           for (m = 0; m < bs; m++) {
447             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
448             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
449             idx++;
450           }
451         }
452       }
453     }
454     mumps->irn = row;
455     mumps->jcn = col;
456     mumps->nnz = nz;
457     mumps->val = aa->a;
458   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
459   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
464 {
465   const PetscInt *ai, *aj, *ajj;
466   PetscInt        bs;
467   PetscInt64      nz, rnz, i, j, k, m;
468   PetscMUMPSInt  *row, *col;
469   PetscScalar    *val;
470   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
471   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
472 #if defined(PETSC_USE_COMPLEX)
473   PetscBool isset, hermitian;
474 #endif
475 
476   PetscFunctionBegin;
477 #if defined(PETSC_USE_COMPLEX)
478   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
479   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
480 #endif
481   ai = aa->i;
482   aj = aa->j;
483   PetscCall(MatGetBlockSize(A, &bs));
484   if (reuse == MAT_INITIAL_MATRIX) {
485     const PetscInt64 alloc_size = aa->nz * bs2;
486 
487     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
488     if (bs > 1) {
489       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
490       mumps->val = mumps->val_alloc;
491     } else {
492       mumps->val = aa->a;
493     }
494     mumps->irn = row;
495     mumps->jcn = col;
496   } else {
497     row = mumps->irn;
498     col = mumps->jcn;
499   }
500   val = mumps->val;
501 
502   nz = 0;
503   if (bs > 1) {
504     for (i = 0; i < mbs; i++) {
505       rnz = ai[i + 1] - ai[i];
506       ajj = aj + ai[i];
507       for (j = 0; j < rnz; j++) {
508         for (k = 0; k < bs; k++) {
509           for (m = 0; m < bs; m++) {
510             if (ajj[j] > i || k >= m) {
511               if (reuse == MAT_INITIAL_MATRIX) {
512                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
513                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
514               }
515               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
516             }
517           }
518         }
519       }
520     }
521   } else if (reuse == MAT_INITIAL_MATRIX) {
522     for (i = 0; i < mbs; i++) {
523       rnz = ai[i + 1] - ai[i];
524       ajj = aj + ai[i];
525       for (j = 0; j < rnz; j++) {
526         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
527         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
528         nz++;
529       }
530     }
531     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT, nz, aa->nz);
532   } else if (mumps->nest_vals)
533     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
534   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
535   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
536   PetscFunctionReturn(PETSC_SUCCESS);
537 }
538 
539 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
540 {
541   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
542   PetscInt64         nz, rnz, i, j;
543   const PetscScalar *av, *v1;
544   PetscScalar       *val;
545   PetscMUMPSInt     *row, *col;
546   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
547   PetscBool          missing;
548 #if defined(PETSC_USE_COMPLEX)
549   PetscBool hermitian, isset;
550 #endif
551 
552   PetscFunctionBegin;
553 #if defined(PETSC_USE_COMPLEX)
554   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
555   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
556 #endif
557   PetscCall(MatSeqAIJGetArrayRead(A, &av));
558   ai    = aa->i;
559   aj    = aa->j;
560   adiag = aa->diag;
561   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
562   if (reuse == MAT_INITIAL_MATRIX) {
563     /* count nz in the upper triangular part of A */
564     nz = 0;
565     if (missing) {
566       for (i = 0; i < M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
568           for (j = ai[i]; j < ai[i + 1]; j++) {
569             if (aj[j] < i) continue;
570             nz++;
571           }
572         } else {
573           nz += ai[i + 1] - adiag[i];
574         }
575       }
576     } else {
577       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
578     }
579     PetscCall(PetscMalloc2(nz, &row, nz, &col));
580     PetscCall(PetscMalloc1(nz, &val));
581     mumps->nnz = nz;
582     mumps->irn = row;
583     mumps->jcn = col;
584     mumps->val = mumps->val_alloc = val;
585 
586     nz = 0;
587     if (missing) {
588       for (i = 0; i < M; i++) {
589         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
590           for (j = ai[i]; j < ai[i + 1]; j++) {
591             if (aj[j] < i) continue;
592             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
593             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
594             val[nz] = av[j];
595             nz++;
596           }
597         } else {
598           rnz = ai[i + 1] - adiag[i];
599           ajj = aj + adiag[i];
600           v1  = av + adiag[i];
601           for (j = 0; j < rnz; j++) {
602             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
603             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
604             val[nz++] = v1[j];
605           }
606         }
607       }
608     } else {
609       for (i = 0; i < M; i++) {
610         rnz = ai[i + 1] - adiag[i];
611         ajj = aj + adiag[i];
612         v1  = av + adiag[i];
613         for (j = 0; j < rnz; j++) {
614           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
615           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
616           val[nz++] = v1[j];
617         }
618       }
619     }
620   } else {
621     nz  = 0;
622     val = mumps->val;
623     if (missing) {
624       for (i = 0; i < M; i++) {
625         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
626           for (j = ai[i]; j < ai[i + 1]; j++) {
627             if (aj[j] < i) continue;
628             val[nz++] = av[j];
629           }
630         } else {
631           rnz = ai[i + 1] - adiag[i];
632           v1  = av + adiag[i];
633           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
634         }
635       }
636     } else {
637       for (i = 0; i < M; i++) {
638         rnz = ai[i + 1] - adiag[i];
639         v1  = av + adiag[i];
640         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
641       }
642     }
643   }
644   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
649 {
650   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
651   PetscInt           bs;
652   PetscInt64         rstart, nz, i, j, k, m, jj, irow, countA, countB;
653   PetscMUMPSInt     *row, *col;
654   const PetscScalar *av, *bv, *v1, *v2;
655   PetscScalar       *val;
656   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
657   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)(mat->A)->data;
658   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
659   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
660 #if defined(PETSC_USE_COMPLEX)
661   PetscBool hermitian, isset;
662 #endif
663 
664   PetscFunctionBegin;
665 #if defined(PETSC_USE_COMPLEX)
666   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
667   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
668 #endif
669   PetscCall(MatGetBlockSize(A, &bs));
670   rstart = A->rmap->rstart;
671   ai     = aa->i;
672   aj     = aa->j;
673   bi     = bb->i;
674   bj     = bb->j;
675   av     = aa->a;
676   bv     = bb->a;
677 
678   garray = mat->garray;
679 
680   if (reuse == MAT_INITIAL_MATRIX) {
681     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
682     PetscCall(PetscMalloc2(nz, &row, nz, &col));
683     PetscCall(PetscMalloc1(nz, &val));
684     /* can not decide the exact mumps->nnz now because of the SBAIJ */
685     mumps->irn = row;
686     mumps->jcn = col;
687     mumps->val = mumps->val_alloc = val;
688   } else {
689     val = mumps->val;
690   }
691 
692   jj   = 0;
693   irow = rstart;
694   for (i = 0; i < mbs; i++) {
695     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
696     countA = ai[i + 1] - ai[i];
697     countB = bi[i + 1] - bi[i];
698     bjj    = bj + bi[i];
699     v1     = av + ai[i] * bs2;
700     v2     = bv + bi[i] * bs2;
701 
702     if (bs > 1) {
703       /* A-part */
704       for (j = 0; j < countA; j++) {
705         for (k = 0; k < bs; k++) {
706           for (m = 0; m < bs; m++) {
707             if (rstart + ajj[j] * bs > irow || k >= m) {
708               if (reuse == MAT_INITIAL_MATRIX) {
709                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
710                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
711               }
712               val[jj++] = v1[j * bs2 + m + k * bs];
713             }
714           }
715         }
716       }
717 
718       /* B-part */
719       for (j = 0; j < countB; j++) {
720         for (k = 0; k < bs; k++) {
721           for (m = 0; m < bs; m++) {
722             if (reuse == MAT_INITIAL_MATRIX) {
723               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
724               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
725             }
726             val[jj++] = v2[j * bs2 + m + k * bs];
727           }
728         }
729       }
730     } else {
731       /* A-part */
732       for (j = 0; j < countA; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
735           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
736         }
737         val[jj++] = v1[j];
738       }
739 
740       /* B-part */
741       for (j = 0; j < countB; j++) {
742         if (reuse == MAT_INITIAL_MATRIX) {
743           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
744           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
745         }
746         val[jj++] = v2[j];
747       }
748     }
749     irow += bs;
750   }
751   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
755 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
756 {
757   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
758   PetscInt64         rstart, cstart, nz, i, j, jj, irow, countA, countB;
759   PetscMUMPSInt     *row, *col;
760   const PetscScalar *av, *bv, *v1, *v2;
761   PetscScalar       *val;
762   Mat                Ad, Ao;
763   Mat_SeqAIJ        *aa;
764   Mat_SeqAIJ        *bb;
765 
766   PetscFunctionBegin;
767   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
768   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
769   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
770 
771   aa = (Mat_SeqAIJ *)(Ad)->data;
772   bb = (Mat_SeqAIJ *)(Ao)->data;
773   ai = aa->i;
774   aj = aa->j;
775   bi = bb->i;
776   bj = bb->j;
777 
778   rstart = A->rmap->rstart;
779   cstart = A->cmap->rstart;
780 
781   if (reuse == MAT_INITIAL_MATRIX) {
782     nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
783     PetscCall(PetscMalloc2(nz, &row, nz, &col));
784     PetscCall(PetscMalloc1(nz, &val));
785     mumps->nnz = nz;
786     mumps->irn = row;
787     mumps->jcn = col;
788     mumps->val = mumps->val_alloc = val;
789   } else {
790     val = mumps->val;
791   }
792 
793   jj   = 0;
794   irow = rstart;
795   for (i = 0; i < m; i++) {
796     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
797     countA = ai[i + 1] - ai[i];
798     countB = bi[i + 1] - bi[i];
799     bjj    = bj + bi[i];
800     v1     = av + ai[i];
801     v2     = bv + bi[i];
802 
803     /* A-part */
804     for (j = 0; j < countA; j++) {
805       if (reuse == MAT_INITIAL_MATRIX) {
806         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
807         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
808       }
809       val[jj++] = v1[j];
810     }
811 
812     /* B-part */
813     for (j = 0; j < countB; j++) {
814       if (reuse == MAT_INITIAL_MATRIX) {
815         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
816         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
817       }
818       val[jj++] = v2[j];
819     }
820     irow++;
821   }
822   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
823   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
824   PetscFunctionReturn(PETSC_SUCCESS);
825 }
826 
827 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
828 {
829   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
830   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)(mat->A)->data;
831   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)(mat->B)->data;
832   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
833   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
834   const PetscInt     bs2 = mat->bs2;
835   PetscInt           bs;
836   PetscInt64         nz, i, j, k, n, jj, irow, countA, countB, idx;
837   PetscMUMPSInt     *row, *col;
838   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
839   PetscScalar       *val;
840 
841   PetscFunctionBegin;
842   PetscCall(MatGetBlockSize(A, &bs));
843   if (reuse == MAT_INITIAL_MATRIX) {
844     nz = bs2 * (aa->nz + bb->nz);
845     PetscCall(PetscMalloc2(nz, &row, nz, &col));
846     PetscCall(PetscMalloc1(nz, &val));
847     mumps->nnz = nz;
848     mumps->irn = row;
849     mumps->jcn = col;
850     mumps->val = mumps->val_alloc = val;
851   } else {
852     val = mumps->val;
853   }
854 
855   jj   = 0;
856   irow = rstart;
857   for (i = 0; i < mbs; i++) {
858     countA = ai[i + 1] - ai[i];
859     countB = bi[i + 1] - bi[i];
860     ajj    = aj + ai[i];
861     bjj    = bj + bi[i];
862     v1     = av + bs2 * ai[i];
863     v2     = bv + bs2 * bi[i];
864 
865     idx = 0;
866     /* A-part */
867     for (k = 0; k < countA; k++) {
868       for (j = 0; j < bs; j++) {
869         for (n = 0; n < bs; n++) {
870           if (reuse == MAT_INITIAL_MATRIX) {
871             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
872             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
873           }
874           val[jj++] = v1[idx++];
875         }
876       }
877     }
878 
879     idx = 0;
880     /* B-part */
881     for (k = 0; k < countB; k++) {
882       for (j = 0; j < bs; j++) {
883         for (n = 0; n < bs; n++) {
884           if (reuse == MAT_INITIAL_MATRIX) {
885             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
886             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
887           }
888           val[jj++] = v2[idx++];
889         }
890       }
891     }
892     irow += bs;
893   }
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
898 {
899   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
900   PetscInt64         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
901   PetscMUMPSInt     *row, *col;
902   const PetscScalar *av, *bv, *v1, *v2;
903   PetscScalar       *val;
904   Mat                Ad, Ao;
905   Mat_SeqAIJ        *aa;
906   Mat_SeqAIJ        *bb;
907 #if defined(PETSC_USE_COMPLEX)
908   PetscBool hermitian, isset;
909 #endif
910 
911   PetscFunctionBegin;
912 #if defined(PETSC_USE_COMPLEX)
913   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
914   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
915 #endif
916   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
917   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
918   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
919 
920   aa    = (Mat_SeqAIJ *)(Ad)->data;
921   bb    = (Mat_SeqAIJ *)(Ao)->data;
922   ai    = aa->i;
923   aj    = aa->j;
924   adiag = aa->diag;
925   bi    = bb->i;
926   bj    = bb->j;
927 
928   rstart = A->rmap->rstart;
929 
930   if (reuse == MAT_INITIAL_MATRIX) {
931     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
932     nzb = 0; /* num of upper triangular entries in mat->B */
933     for (i = 0; i < m; i++) {
934       nza += (ai[i + 1] - adiag[i]);
935       countB = bi[i + 1] - bi[i];
936       bjj    = bj + bi[i];
937       for (j = 0; j < countB; j++) {
938         if (garray[bjj[j]] > rstart) nzb++;
939       }
940     }
941 
942     nz = nza + nzb; /* total nz of upper triangular part of mat */
943     PetscCall(PetscMalloc2(nz, &row, nz, &col));
944     PetscCall(PetscMalloc1(nz, &val));
945     mumps->nnz = nz;
946     mumps->irn = row;
947     mumps->jcn = col;
948     mumps->val = mumps->val_alloc = val;
949   } else {
950     val = mumps->val;
951   }
952 
953   jj   = 0;
954   irow = rstart;
955   for (i = 0; i < m; i++) {
956     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
957     v1     = av + adiag[i];
958     countA = ai[i + 1] - adiag[i];
959     countB = bi[i + 1] - bi[i];
960     bjj    = bj + bi[i];
961     v2     = bv + bi[i];
962 
963     /* A-part */
964     for (j = 0; j < countA; j++) {
965       if (reuse == MAT_INITIAL_MATRIX) {
966         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
967         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
968       }
969       val[jj++] = v1[j];
970     }
971 
972     /* B-part */
973     for (j = 0; j < countB; j++) {
974       if (garray[bjj[j]] > rstart) {
975         if (reuse == MAT_INITIAL_MATRIX) {
976           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
977           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
978         }
979         val[jj++] = v2[j];
980       }
981     }
982     irow++;
983   }
984   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
985   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
989 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
990 {
991   const PetscScalar *av;
992   const PetscInt     M = A->rmap->n;
993   PetscInt64         i;
994   PetscMUMPSInt     *row, *col;
995   Vec                v;
996 
997   PetscFunctionBegin;
998   PetscCall(MatDiagonalGetDiagonal(A, &v));
999   PetscCall(VecGetArrayRead(v, &av));
1000   if (reuse == MAT_INITIAL_MATRIX) {
1001     PetscCall(PetscMalloc2(M, &row, M, &col));
1002     for (i = 0; i < M; i++) {
1003       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1004       col[i] = row[i];
1005     }
1006     mumps->val = (PetscScalar *)av;
1007     mumps->irn = row;
1008     mumps->jcn = col;
1009     mumps->nnz = M;
1010   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1011   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1012   PetscCall(VecRestoreArrayRead(v, &av));
1013   PetscFunctionReturn(PETSC_SUCCESS);
1014 }
1015 
1016 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1017 {
1018   Mat     **mats;
1019   PetscInt  nr, nc;
1020   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1024   if (reuse == MAT_INITIAL_MATRIX) {
1025     PetscMUMPSInt *irns, *jcns;
1026     PetscScalar   *vals;
1027     PetscInt64     totnnz, cumnnz, maxnnz;
1028     PetscInt      *pjcns_w;
1029     IS            *rows, *cols;
1030     PetscInt     **rows_idx, **cols_idx;
1031 
1032     cumnnz = 0;
1033     maxnnz = 0;
1034     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1035     for (PetscInt r = 0; r < nr; r++) {
1036       for (PetscInt c = 0; c < nc; c++) {
1037         Mat sub = mats[r][c];
1038 
1039         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1040         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1041         if (sub) {
1042           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1043           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isHTrans = PETSC_FALSE, isDiag;
1044           MatInfo   info;
1045 
1046           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1047           if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1048           else {
1049             PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1050             if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1051           }
1052           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1053           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1054           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1055           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1056           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1057           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1058           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1059 
1060           if (chol) {
1061             if (r == c) {
1062               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1063               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1064               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1065               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1066               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1067             } else {
1068               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1069               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1070               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1071               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1072               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1073             }
1074           } else {
1075             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1076             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1077             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1078             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1079             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1080           }
1081           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1082           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1083           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1084           cumnnz += (PetscInt64)info.nz_used; /* can be overestimated for Cholesky */
1085           maxnnz = PetscMax(maxnnz, info.nz_used);
1086         }
1087       }
1088     }
1089 
1090     /* Allocate total COO */
1091     totnnz = cumnnz;
1092     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1093     PetscCall(PetscMalloc1(totnnz, &vals));
1094 
1095     /* Handle rows and column maps
1096        We directly map rows and use an SF for the columns */
1097     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1098     PetscCall(MatNestGetISs(A, rows, cols));
1099     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1100     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1101     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1102     else (void)maxnnz;
1103 
1104     cumnnz = 0;
1105     for (PetscInt r = 0; r < nr; r++) {
1106       for (PetscInt c = 0; c < nc; c++) {
1107         Mat             sub  = mats[r][c];
1108         const PetscInt *ridx = rows_idx[r];
1109         const PetscInt *cidx = cols_idx[c];
1110         PetscInt        rst;
1111         PetscSF         csf;
1112         PetscBool       isTrans, isHTrans = PETSC_FALSE, swap;
1113         PetscLayout     cmap;
1114 
1115         mumps->nest_vals_start[r * nc + c] = cumnnz;
1116         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1117 
1118         /* Extract inner blocks if needed */
1119         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1120         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1121         else {
1122           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1123           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1124         }
1125         swap = (PetscBool)(isTrans || isHTrans);
1126 
1127         /* Get column layout to map off-process columns */
1128         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1129 
1130         /* Get row start to map on-process rows */
1131         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1132 
1133         /* Directly use the mumps datastructure and use C ordering for now */
1134         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1135 
1136         /* Swap the role of rows and columns indices for transposed blocks
1137            since we need values with global final ordering */
1138         if (swap) {
1139           cidx = rows_idx[r];
1140           ridx = cols_idx[c];
1141         }
1142 
1143         /* Communicate column indices
1144            This could have been done with a single SF but it would have complicated the code a lot.
1145            But since we do it only once, we pay the price of setting up an SF for each block */
1146         if (PetscDefined(USE_64BIT_INDICES)) {
1147           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1148         } else pjcns_w = (PetscInt *)(mumps->jcn); /* This cast is needed only to silence warnings for 64bit integers builds */
1149         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1150         PetscCall(PetscSFSetGraphLayout(csf, cmap, mumps->nnz, NULL, PETSC_OWN_POINTER, pjcns_w));
1151         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1152         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1153         PetscCall(PetscSFDestroy(&csf));
1154 
1155         /* Import indices: use direct map for rows and mapped indices for columns */
1156         if (swap) {
1157           for (PetscInt k = 0; k < mumps->nnz; k++) {
1158             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1159             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1160           }
1161         } else {
1162           for (PetscInt k = 0; k < mumps->nnz; k++) {
1163             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1164             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1165           }
1166         }
1167 
1168         /* Import values to full COO */
1169         PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1170         if (isHTrans) { /* conjugate the entries */
1171           PetscScalar *v = vals + cumnnz;
1172           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = PetscConj(v[k]);
1173         }
1174 
1175         /* Shift new starting point and sanity check */
1176         cumnnz += mumps->nnz;
1177         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1178 
1179         /* Free scratch memory */
1180         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1181         PetscCall(PetscFree(mumps->val_alloc));
1182         mumps->val = NULL;
1183         mumps->nnz = 0;
1184       }
1185     }
1186     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1187     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1188     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1189     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1190     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscInt64_FMT " != %" PetscInt64_FMT, cumnnz, totnnz);
1191     mumps->nest_vals_start[nr * nc] = cumnnz;
1192 
1193     /* Set pointers for final MUMPS data structure */
1194     mumps->nest_vals = vals;
1195     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1196     mumps->val       = vals;
1197     mumps->irn       = irns;
1198     mumps->jcn       = jcns;
1199     mumps->nnz       = cumnnz;
1200   } else {
1201     PetscScalar *oval = mumps->nest_vals;
1202     for (PetscInt r = 0; r < nr; r++) {
1203       for (PetscInt c = 0; c < nc; c++) {
1204         PetscBool isTrans, isHTrans = PETSC_FALSE;
1205         Mat       sub  = mats[r][c];
1206         PetscInt  midx = r * nc + c;
1207 
1208         if (!mumps->nest_convert_to_triples[midx]) continue;
1209         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
1210         if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
1211         else {
1212           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1213           if (isHTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
1214         }
1215         mumps->val = oval + mumps->nest_vals_start[midx];
1216         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1217         if (isHTrans) {
1218           PetscInt nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1219           for (PetscInt k = 0; k < nnz; k++) mumps->val[k] = PetscConj(mumps->val[k]);
1220         }
1221       }
1222     }
1223     mumps->val = oval;
1224   }
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
1228 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1229 {
1230   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
1234   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1235   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1236   PetscCall(VecDestroy(&mumps->b_seq));
1237   PetscCall(VecDestroy(&mumps->x_seq));
1238   PetscCall(PetscFree(mumps->id.perm_in));
1239   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1240   PetscCall(PetscFree(mumps->val_alloc));
1241   PetscCall(PetscFree(mumps->info));
1242   PetscCall(PetscFree(mumps->ICNTL_pre));
1243   PetscCall(PetscFree(mumps->CNTL_pre));
1244   PetscCall(MatMumpsResetSchur_Private(mumps));
1245   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1246     mumps->id.job = JOB_END;
1247     PetscMUMPS_c(mumps);
1248     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1249     if (mumps->mumps_comm != MPI_COMM_NULL) {
1250       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1251       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1252     }
1253   }
1254 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1255   if (mumps->use_petsc_omp_support) {
1256     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1257     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1258     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1259   }
1260 #endif
1261   PetscCall(PetscFree(mumps->ia_alloc));
1262   PetscCall(PetscFree(mumps->ja_alloc));
1263   PetscCall(PetscFree(mumps->recvcount));
1264   PetscCall(PetscFree(mumps->reqs));
1265   PetscCall(PetscFree(mumps->irhs_loc));
1266   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1267   PetscCall(PetscFree(mumps->nest_vals));
1268   PetscCall(PetscFree(A->data));
1269 
1270   /* clear composed functions */
1271   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1272   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1273   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1274   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1275   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1279   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1280   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1281   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1282   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1283   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1284   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1285   PetscFunctionReturn(PETSC_SUCCESS);
1286 }
1287 
1288 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1289 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1290 {
1291   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1292   const PetscMPIInt ompsize = mumps->omp_comm_size;
1293   PetscInt          i, m, M, rstart;
1294 
1295   PetscFunctionBegin;
1296   PetscCall(MatGetSize(A, &M, NULL));
1297   PetscCall(MatGetLocalSize(A, &m, NULL));
1298   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1299   if (ompsize == 1) {
1300     if (!mumps->irhs_loc) {
1301       mumps->nloc_rhs = m;
1302       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1303       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1304       for (i = 0; i < m; i++) mumps->irhs_loc[i] = rstart + i + 1; /* use 1-based indices */
1305     }
1306     mumps->id.rhs_loc = (MumpsScalar *)array;
1307   } else {
1308 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1309     const PetscInt *ranges;
1310     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1311     MPI_Group       petsc_group, omp_group;
1312     PetscScalar    *recvbuf = NULL;
1313 
1314     if (mumps->is_omp_master) {
1315       /* Lazily initialize the omp stuff for distributed rhs */
1316       if (!mumps->irhs_loc) {
1317         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1318         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1319         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1320         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1321         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1322         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1323 
1324         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1325         mumps->nloc_rhs = 0;
1326         PetscCall(MatGetOwnershipRanges(A, &ranges));
1327         for (j = 0; j < ompsize; j++) {
1328           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1329           mumps->nloc_rhs += mumps->rhs_nrow[j];
1330         }
1331         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1332         for (j = k = 0; j < ompsize; j++) {
1333           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) mumps->irhs_loc[k] = i + 1; /* uses 1-based indices */
1334         }
1335 
1336         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1337         PetscCallMPI(MPI_Group_free(&petsc_group));
1338         PetscCallMPI(MPI_Group_free(&omp_group));
1339       }
1340 
1341       /* Realloc buffers when current nrhs is bigger than what we have met */
1342       if (nrhs > mumps->max_nrhs) {
1343         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1344         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1345         mumps->max_nrhs = nrhs;
1346       }
1347 
1348       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1349       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1350       mumps->rhs_disps[0] = 0;
1351       for (j = 1; j < ompsize; j++) {
1352         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1353         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1354       }
1355       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1356     }
1357 
1358     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1359     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1360 
1361     if (mumps->is_omp_master) {
1362       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1363         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1364         for (j = 0; j < ompsize; j++) {
1365           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1366           dst                    = dstbase;
1367           for (i = 0; i < nrhs; i++) {
1368             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1369             src += mumps->rhs_nrow[j];
1370             dst += mumps->nloc_rhs;
1371           }
1372           dstbase += mumps->rhs_nrow[j];
1373         }
1374       }
1375       mumps->id.rhs_loc = (MumpsScalar *)mumps->rhs_loc;
1376     }
1377 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1378   }
1379   mumps->id.nrhs     = nrhs;
1380   mumps->id.nloc_rhs = mumps->nloc_rhs;
1381   mumps->id.lrhs_loc = mumps->nloc_rhs;
1382   mumps->id.irhs_loc = mumps->irhs_loc;
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1387 {
1388   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1389   const PetscScalar *rarray = NULL;
1390   PetscScalar       *array;
1391   IS                 is_iden, is_petsc;
1392   PetscInt           i;
1393   PetscBool          second_solve = PETSC_FALSE;
1394   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1395 
1396   PetscFunctionBegin;
1397   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1398                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1399                                    &cite1));
1400   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1401                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1402                                    &cite2));
1403 
1404   if (A->factorerrortype) {
1405     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1406     PetscCall(VecSetInf(x));
1407     PetscFunctionReturn(PETSC_SUCCESS);
1408   }
1409 
1410   mumps->id.nrhs = 1;
1411   if (mumps->petsc_size > 1) {
1412     if (mumps->ICNTL20 == 10) {
1413       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1414       PetscCall(VecGetArrayRead(b, &rarray));
1415       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, rarray));
1416     } else {
1417       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1418       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1419       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1420       if (!mumps->myid) {
1421         PetscCall(VecGetArray(mumps->b_seq, &array));
1422         mumps->id.rhs = (MumpsScalar *)array;
1423       }
1424     }
1425   } else {                   /* petsc_size == 1 */
1426     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1427     PetscCall(VecCopy(b, x));
1428     PetscCall(VecGetArray(x, &array));
1429     mumps->id.rhs = (MumpsScalar *)array;
1430   }
1431 
1432   /*
1433      handle condensation step of Schur complement (if any)
1434      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1435      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1436      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1437      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1438   */
1439   if (mumps->id.size_schur > 0) {
1440     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1441     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1442       second_solve = PETSC_TRUE;
1443       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1444       mumps->id.ICNTL(26) = 1; /* condensation phase */
1445     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1446   }
1447   /* solve phase */
1448   mumps->id.job = JOB_SOLVE;
1449   PetscMUMPS_c(mumps);
1450   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1451 
1452   /* handle expansion step of Schur complement (if any) */
1453   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1454   else if (mumps->id.ICNTL(26) == 1) {
1455     PetscCall(MatMumpsSolveSchur_Private(A));
1456     for (i = 0; i < mumps->id.size_schur; ++i) {
1457 #if !defined(PETSC_USE_COMPLEX)
1458       PetscScalar val = mumps->id.redrhs[i];
1459 #else
1460       PetscScalar val = mumps->id.redrhs[i].r + PETSC_i * mumps->id.redrhs[i].i;
1461 #endif
1462       array[mumps->id.listvar_schur[i] - 1] = val;
1463     }
1464   }
1465 
1466   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1467     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1468       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1469       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1470     }
1471     if (!mumps->scat_sol) { /* create scatter scat_sol */
1472       PetscInt *isol2_loc = NULL;
1473       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1474       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1475       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1476       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1477       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1478       PetscCall(ISDestroy(&is_iden));
1479       PetscCall(ISDestroy(&is_petsc));
1480       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1481     }
1482 
1483     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1484     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
1485   }
1486 
1487   if (mumps->petsc_size > 1) {
1488     if (mumps->ICNTL20 == 10) {
1489       PetscCall(VecRestoreArrayRead(b, &rarray));
1490     } else if (!mumps->myid) {
1491       PetscCall(VecRestoreArray(mumps->b_seq, &array));
1492     }
1493   } else PetscCall(VecRestoreArray(x, &array));
1494 
1495   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
1500 {
1501   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1502   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1503 
1504   PetscFunctionBegin;
1505   mumps->id.ICNTL(9) = 0;
1506   PetscCall(MatSolve_MUMPS(A, b, x));
1507   mumps->id.ICNTL(9) = value;
1508   PetscFunctionReturn(PETSC_SUCCESS);
1509 }
1510 
1511 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
1512 {
1513   Mat                Bt = NULL;
1514   PetscBool          denseX, denseB, flg, flgT;
1515   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
1516   PetscInt           i, nrhs, M;
1517   PetscScalar       *array;
1518   const PetscScalar *rbray;
1519   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
1520   PetscMUMPSInt     *isol_loc, *isol_loc_save;
1521   PetscScalar       *bray, *sol_loc, *sol_loc_save;
1522   IS                 is_to, is_from;
1523   PetscInt           k, proc, j, m, myrstart;
1524   const PetscInt    *rstart;
1525   Vec                v_mpi, msol_loc;
1526   VecScatter         scat_sol;
1527   Vec                b_seq;
1528   VecScatter         scat_rhs;
1529   PetscScalar       *aa;
1530   PetscInt           spnr, *ia, *ja;
1531   Mat_MPIAIJ        *b = NULL;
1532 
1533   PetscFunctionBegin;
1534   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
1535   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
1536 
1537   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
1538   if (denseB) {
1539     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
1540     mumps->id.ICNTL(20) = 0; /* dense RHS */
1541   } else {                   /* sparse B */
1542     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
1543     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
1544     if (flgT) { /* input B is transpose of actual RHS matrix,
1545                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1546       PetscCall(MatTransposeGetMat(B, &Bt));
1547     } else SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
1548     mumps->id.ICNTL(20) = 1; /* sparse RHS */
1549   }
1550 
1551   PetscCall(MatGetSize(B, &M, &nrhs));
1552   mumps->id.nrhs = nrhs;
1553   mumps->id.lrhs = M;
1554   mumps->id.rhs  = NULL;
1555 
1556   if (mumps->petsc_size == 1) {
1557     PetscScalar *aa;
1558     PetscInt     spnr, *ia, *ja;
1559     PetscBool    second_solve = PETSC_FALSE;
1560 
1561     PetscCall(MatDenseGetArray(X, &array));
1562     mumps->id.rhs = (MumpsScalar *)array;
1563 
1564     if (denseB) {
1565       /* copy B to X */
1566       PetscCall(MatDenseGetArrayRead(B, &rbray));
1567       PetscCall(PetscArraycpy(array, rbray, M * nrhs));
1568       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1569     } else { /* sparse B */
1570       PetscCall(MatSeqAIJGetArray(Bt, &aa));
1571       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1572       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1573       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1574       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1575     }
1576     /* handle condensation step of Schur complement (if any) */
1577     if (mumps->id.size_schur > 0) {
1578       PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1579       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1580         second_solve = PETSC_TRUE;
1581         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1582         mumps->id.ICNTL(26) = 1; /* condensation phase */
1583       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1584     }
1585     /* solve phase */
1586     mumps->id.job = JOB_SOLVE;
1587     PetscMUMPS_c(mumps);
1588     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1589 
1590     /* handle expansion step of Schur complement (if any) */
1591     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1592     else if (mumps->id.ICNTL(26) == 1) {
1593       PetscCall(MatMumpsSolveSchur_Private(A));
1594       for (j = 0; j < nrhs; ++j)
1595         for (i = 0; i < mumps->id.size_schur; ++i) {
1596 #if !defined(PETSC_USE_COMPLEX)
1597           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs];
1598 #else
1599           PetscScalar val = mumps->id.redrhs[i + j * mumps->id.lredrhs].r + PETSC_i * mumps->id.redrhs[i + j * mumps->id.lredrhs].i;
1600 #endif
1601           array[mumps->id.listvar_schur[i] - 1 + j * M] = val;
1602         }
1603     }
1604     if (!denseB) { /* sparse B */
1605       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
1606       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1607       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1608     }
1609     PetscCall(MatDenseRestoreArray(X, &array));
1610     PetscFunctionReturn(PETSC_SUCCESS);
1611   }
1612 
1613   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
1614   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1615 
1616   /* create msol_loc to hold mumps local solution */
1617   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1618   sol_loc_save  = (PetscScalar *)mumps->id.sol_loc;
1619 
1620   lsol_loc  = mumps->id.lsol_loc;
1621   nlsol_loc = nrhs * lsol_loc; /* length of sol_loc */
1622   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
1623   mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
1624   mumps->id.isol_loc = isol_loc;
1625 
1626   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
1627 
1628   if (denseB) {
1629     if (mumps->ICNTL20 == 10) {
1630       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1631       PetscCall(MatDenseGetArrayRead(B, &rbray));
1632       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, rbray));
1633       PetscCall(MatDenseRestoreArrayRead(B, &rbray));
1634       PetscCall(MatGetLocalSize(B, &m, NULL));
1635       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, NULL, &v_mpi));
1636     } else {
1637       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1638       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1639         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1640         0, re-arrange B into desired order, which is a local operation.
1641       */
1642 
1643       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1644       /* wrap dense rhs matrix B into a vector v_mpi */
1645       PetscCall(MatGetLocalSize(B, &m, NULL));
1646       PetscCall(MatDenseGetArray(B, &bray));
1647       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1648       PetscCall(MatDenseRestoreArray(B, &bray));
1649 
1650       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1651       if (!mumps->myid) {
1652         PetscInt *idx;
1653         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1654         PetscCall(PetscMalloc1(nrhs * M, &idx));
1655         PetscCall(MatGetOwnershipRanges(B, &rstart));
1656         k = 0;
1657         for (proc = 0; proc < mumps->petsc_size; proc++) {
1658           for (j = 0; j < nrhs; j++) {
1659             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
1660           }
1661         }
1662 
1663         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhs * M, &b_seq));
1664         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhs * M, idx, PETSC_OWN_POINTER, &is_to));
1665         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhs * M, 0, 1, &is_from));
1666       } else {
1667         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
1668         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
1669         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
1670       }
1671       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
1672       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1673       PetscCall(ISDestroy(&is_to));
1674       PetscCall(ISDestroy(&is_from));
1675       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
1676 
1677       if (!mumps->myid) { /* define rhs on the host */
1678         PetscCall(VecGetArray(b_seq, &bray));
1679         mumps->id.rhs = (MumpsScalar *)bray;
1680         PetscCall(VecRestoreArray(b_seq, &bray));
1681       }
1682     }
1683   } else { /* sparse B */
1684     b = (Mat_MPIAIJ *)Bt->data;
1685 
1686     /* wrap dense X into a vector v_mpi */
1687     PetscCall(MatGetLocalSize(X, &m, NULL));
1688     PetscCall(MatDenseGetArray(X, &bray));
1689     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhs * M, (const PetscScalar *)bray, &v_mpi));
1690     PetscCall(MatDenseRestoreArray(X, &bray));
1691 
1692     if (!mumps->myid) {
1693       PetscCall(MatSeqAIJGetArray(b->A, &aa));
1694       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1695       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
1696       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
1697       mumps->id.rhs_sparse = (MumpsScalar *)aa;
1698     } else {
1699       mumps->id.irhs_ptr    = NULL;
1700       mumps->id.irhs_sparse = NULL;
1701       mumps->id.nz_rhs      = 0;
1702       mumps->id.rhs_sparse  = NULL;
1703     }
1704   }
1705 
1706   /* solve phase */
1707   mumps->id.job = JOB_SOLVE;
1708   PetscMUMPS_c(mumps);
1709   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1710 
1711   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1712   PetscCall(MatDenseGetArray(X, &array));
1713   PetscCall(VecPlaceArray(v_mpi, array));
1714 
1715   /* create scatter scat_sol */
1716   PetscCall(MatGetOwnershipRanges(X, &rstart));
1717   /* iidx: index for scatter mumps solution to petsc X */
1718 
1719   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
1720   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
1721   for (i = 0; i < lsol_loc; i++) {
1722     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1723 
1724     for (proc = 0; proc < mumps->petsc_size; proc++) {
1725       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
1726         myrstart = rstart[proc];
1727         k        = isol_loc[i] - myrstart;          /* local index on 1st column of petsc vector X */
1728         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1729         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
1730         break;
1731       }
1732     }
1733 
1734     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
1735   }
1736   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
1737   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
1738   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1739   PetscCall(ISDestroy(&is_from));
1740   PetscCall(ISDestroy(&is_to));
1741   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
1742   PetscCall(MatDenseRestoreArray(X, &array));
1743 
1744   /* free spaces */
1745   mumps->id.sol_loc  = (MumpsScalar *)sol_loc_save;
1746   mumps->id.isol_loc = isol_loc_save;
1747 
1748   PetscCall(PetscFree2(sol_loc, isol_loc));
1749   PetscCall(PetscFree(idxx));
1750   PetscCall(VecDestroy(&msol_loc));
1751   PetscCall(VecDestroy(&v_mpi));
1752   if (!denseB) {
1753     if (!mumps->myid) {
1754       b = (Mat_MPIAIJ *)Bt->data;
1755       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
1756       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
1757       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
1758     }
1759   } else {
1760     if (mumps->ICNTL20 == 0) {
1761       PetscCall(VecDestroy(&b_seq));
1762       PetscCall(VecScatterDestroy(&scat_rhs));
1763     }
1764   }
1765   PetscCall(VecScatterDestroy(&scat_sol));
1766   PetscCall(PetscLogFlops(nrhs * PetscMax(0, (2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))));
1767   PetscFunctionReturn(PETSC_SUCCESS);
1768 }
1769 
1770 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
1771 {
1772   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
1773   const PetscMUMPSInt value = mumps->id.ICNTL(9);
1774 
1775   PetscFunctionBegin;
1776   mumps->id.ICNTL(9) = 0;
1777   PetscCall(MatMatSolve_MUMPS(A, B, X));
1778   mumps->id.ICNTL(9) = value;
1779   PetscFunctionReturn(PETSC_SUCCESS);
1780 }
1781 
1782 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
1783 {
1784   PetscBool flg;
1785   Mat       B;
1786 
1787   PetscFunctionBegin;
1788   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
1789   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
1790 
1791   /* Create B=Bt^T that uses Bt's data structure */
1792   PetscCall(MatCreateTranspose(Bt, &B));
1793 
1794   PetscCall(MatMatSolve_MUMPS(A, B, X));
1795   PetscCall(MatDestroy(&B));
1796   PetscFunctionReturn(PETSC_SUCCESS);
1797 }
1798 
1799 #if !defined(PETSC_USE_COMPLEX)
1800 /*
1801   input:
1802    F:        numeric factor
1803   output:
1804    nneg:     total number of negative pivots
1805    nzero:    total number of zero pivots
1806    npos:     (global dimension of F) - nneg - nzero
1807 */
1808 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
1809 {
1810   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
1811   PetscMPIInt size;
1812 
1813   PetscFunctionBegin;
1814   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
1815   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1816   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
1817 
1818   if (nneg) *nneg = mumps->id.INFOG(12);
1819   if (nzero || npos) {
1820     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1821     if (nzero) *nzero = mumps->id.INFOG(28);
1822     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1823   }
1824   PetscFunctionReturn(PETSC_SUCCESS);
1825 }
1826 #endif
1827 
1828 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
1829 {
1830   PetscInt       i, nreqs;
1831   PetscMUMPSInt *irn, *jcn;
1832   PetscMPIInt    count;
1833   PetscInt64     totnnz, remain;
1834   const PetscInt osize = mumps->omp_comm_size;
1835   PetscScalar   *val;
1836 
1837   PetscFunctionBegin;
1838   if (osize > 1) {
1839     if (reuse == MAT_INITIAL_MATRIX) {
1840       /* master first gathers counts of nonzeros to receive */
1841       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
1842       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
1843 
1844       /* Then each computes number of send/recvs */
1845       if (mumps->is_omp_master) {
1846         /* Start from 1 since self communication is not done in MPI */
1847         nreqs = 0;
1848         for (i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1849       } else {
1850         nreqs = (mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
1851       }
1852       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
1853 
1854       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1855          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1856          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1857          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1858        */
1859       nreqs = 0; /* counter for actual send/recvs */
1860       if (mumps->is_omp_master) {
1861         for (i = 0, totnnz = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1862         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
1863         PetscCall(PetscMalloc1(totnnz, &val));
1864 
1865         /* Self communication */
1866         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
1867         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
1868         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
1869 
1870         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1871         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1872         PetscCall(PetscFree(mumps->val_alloc));
1873         mumps->nnz = totnnz;
1874         mumps->irn = irn;
1875         mumps->jcn = jcn;
1876         mumps->val = mumps->val_alloc = val;
1877 
1878         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1879         jcn += mumps->recvcount[0];
1880         val += mumps->recvcount[0];
1881 
1882         /* Remote communication */
1883         for (i = 1; i < osize; i++) {
1884           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1885           remain = mumps->recvcount[i] - count;
1886           while (count > 0) {
1887             PetscCallMPI(MPI_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1888             PetscCallMPI(MPI_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1889             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1890             irn += count;
1891             jcn += count;
1892             val += count;
1893             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1894             remain -= count;
1895           }
1896         }
1897       } else {
1898         irn    = mumps->irn;
1899         jcn    = mumps->jcn;
1900         val    = mumps->val;
1901         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1902         remain = mumps->nnz - count;
1903         while (count > 0) {
1904           PetscCallMPI(MPI_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1905           PetscCallMPI(MPI_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1906           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1907           irn += count;
1908           jcn += count;
1909           val += count;
1910           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1911           remain -= count;
1912         }
1913       }
1914     } else {
1915       nreqs = 0;
1916       if (mumps->is_omp_master) {
1917         val = mumps->val + mumps->recvcount[0];
1918         for (i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
1919           count  = PetscMin(mumps->recvcount[i], PETSC_MPI_INT_MAX);
1920           remain = mumps->recvcount[i] - count;
1921           while (count > 0) {
1922             PetscCallMPI(MPI_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1923             val += count;
1924             count = PetscMin(remain, PETSC_MPI_INT_MAX);
1925             remain -= count;
1926           }
1927         }
1928       } else {
1929         val    = mumps->val;
1930         count  = PetscMin(mumps->nnz, PETSC_MPI_INT_MAX);
1931         remain = mumps->nnz - count;
1932         while (count > 0) {
1933           PetscCallMPI(MPI_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
1934           val += count;
1935           count = PetscMin(remain, PETSC_MPI_INT_MAX);
1936           remain -= count;
1937         }
1938       }
1939     }
1940     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
1941     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1942   }
1943   PetscFunctionReturn(PETSC_SUCCESS);
1944 }
1945 
1946 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, const MatFactorInfo *info)
1947 {
1948   Mat_MUMPS *mumps = (Mat_MUMPS *)(F)->data;
1949   PetscBool  isMPIAIJ;
1950 
1951   PetscFunctionBegin;
1952   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1953     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1954     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1955     PetscFunctionReturn(PETSC_SUCCESS);
1956   }
1957 
1958   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1959   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
1960 
1961   /* numerical factorization phase */
1962   mumps->id.job = JOB_FACTNUMERIC;
1963   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1964     if (!mumps->myid) mumps->id.a = (MumpsScalar *)mumps->val;
1965   } else {
1966     mumps->id.a_loc = (MumpsScalar *)mumps->val;
1967   }
1968   PetscMUMPS_c(mumps);
1969   if (mumps->id.INFOG(1) < 0) {
1970     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1971     if (mumps->id.INFOG(1) == -10) {
1972       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1973       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1974     } else if (mumps->id.INFOG(1) == -13) {
1975       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1976       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1977     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1978       PetscCall(PetscInfo(F, "MUMPS error in numerical factorizatione: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1979       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1980     } else {
1981       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1982       F->factorerrortype = MAT_FACTOR_OTHER;
1983     }
1984   }
1985   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
1986 
1987   F->assembled = PETSC_TRUE;
1988 
1989   if (F->schur) { /* reset Schur status to unfactored */
1990 #if defined(PETSC_HAVE_CUDA)
1991     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1992 #endif
1993     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1994       mumps->id.ICNTL(19) = 2;
1995       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
1996     }
1997     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
1998   }
1999 
2000   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2001   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2002 
2003   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2004   if (mumps->petsc_size > 1) {
2005     PetscInt     lsol_loc;
2006     PetscScalar *sol_loc;
2007 
2008     PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &isMPIAIJ));
2009 
2010     /* distributed solution; Create x_seq=sol_loc for repeated use */
2011     if (mumps->x_seq) {
2012       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2013       PetscCall(PetscFree2(mumps->id.sol_loc, mumps->id.isol_loc));
2014       PetscCall(VecDestroy(&mumps->x_seq));
2015     }
2016     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2017     PetscCall(PetscMalloc2(lsol_loc, &sol_loc, lsol_loc, &mumps->id.isol_loc));
2018     mumps->id.lsol_loc = lsol_loc;
2019     mumps->id.sol_loc  = (MumpsScalar *)sol_loc;
2020     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, lsol_loc, sol_loc, &mumps->x_seq));
2021   }
2022   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
2023   PetscFunctionReturn(PETSC_SUCCESS);
2024 }
2025 
2026 /* Sets MUMPS options from the options database */
2027 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2028 {
2029   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2030   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2031   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2032   PetscBool     flg = PETSC_FALSE, schur = (PetscBool)(mumps->id.ICNTL(26) == -1);
2033   MumpsScalar  *arr;
2034 
2035   PetscFunctionBegin;
2036   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2037   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2038     PetscInt nthreads   = 0;
2039     PetscInt nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2040     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2041 
2042     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2043     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2044     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2045 
2046     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2047     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2048     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2049     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2050     if (mumps->use_petsc_omp_support) {
2051       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT), PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2052                  ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2053       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2054 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2055       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2056       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2057 #endif
2058     } else {
2059       mumps->omp_comm      = PETSC_COMM_SELF;
2060       mumps->mumps_comm    = mumps->petsc_comm;
2061       mumps->is_omp_master = PETSC_TRUE;
2062     }
2063     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2064     mumps->reqs = NULL;
2065     mumps->tag  = 0;
2066 
2067     if (mumps->mumps_comm != MPI_COMM_NULL) {
2068       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2069         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2070         MPI_Comm comm;
2071         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2072         mumps->mumps_comm = comm;
2073       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2074     }
2075 
2076     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2077     mumps->id.job          = JOB_INIT;
2078     mumps->id.par          = 1; /* host participates factorizaton and solve */
2079     mumps->id.sym          = mumps->sym;
2080 
2081     size          = mumps->id.size_schur;
2082     arr           = mumps->id.schur;
2083     listvar_schur = mumps->id.listvar_schur;
2084     PetscMUMPS_c(mumps);
2085     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2086     /* restore cached ICNTL and CNTL values */
2087     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2088     for (icntl = 0; icntl < nCNTL_pre; ++icntl) mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1 + 2 * icntl]) = mumps->CNTL_pre[2 + 2 * icntl];
2089     PetscCall(PetscFree(mumps->ICNTL_pre));
2090     PetscCall(PetscFree(mumps->CNTL_pre));
2091 
2092     if (schur) {
2093       mumps->id.size_schur    = size;
2094       mumps->id.schur_lld     = size;
2095       mumps->id.schur         = arr;
2096       mumps->id.listvar_schur = listvar_schur;
2097       if (mumps->petsc_size > 1) {
2098         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2099 
2100         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2101         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2102         PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPIU_BOOL, MPI_LAND, mumps->petsc_comm));
2103         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2104       } else {
2105         if (F->factortype == MAT_FACTOR_LU) {
2106           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2107         } else {
2108           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2109         }
2110       }
2111       mumps->id.ICNTL(26) = -1;
2112     }
2113 
2114     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2115        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2116      */
2117     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2118     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_REAL, 0, mumps->omp_comm));
2119 
2120     mumps->scat_rhs = NULL;
2121     mumps->scat_sol = NULL;
2122 
2123     /* set PETSc-MUMPS default options - override MUMPS default */
2124     mumps->id.ICNTL(3) = 0;
2125     mumps->id.ICNTL(4) = 0;
2126     if (mumps->petsc_size == 1) {
2127       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2128       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2129     } else {
2130       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2131       mumps->id.ICNTL(21) = 1; /* distributed solution */
2132     }
2133   }
2134   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2135   if (flg) mumps->id.ICNTL(1) = icntl;
2136   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2137   if (flg) mumps->id.ICNTL(2) = icntl;
2138   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2139   if (flg) mumps->id.ICNTL(3) = icntl;
2140 
2141   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2142   if (flg) mumps->id.ICNTL(4) = icntl;
2143   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2144 
2145   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2146   if (flg) mumps->id.ICNTL(6) = icntl;
2147 
2148   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2149   if (flg) {
2150     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2151     mumps->id.ICNTL(7) = icntl;
2152   }
2153 
2154   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2155   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2156   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2157   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2158   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2159   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2160   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2161   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2162   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
2163   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2164   if (flg) {
2165     PetscCheck(mumps->id.ICNTL(15) <= 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Positive -mat_mumps_icntl_15 not handled");
2166     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2167   }
2168   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2169   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2170     PetscCall(MatDestroy(&F->schur));
2171     PetscCall(MatMumpsResetSchur_Private(mumps));
2172   }
2173 
2174   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2175      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2176      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2177      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2178      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2179      In short, we could not use distributed RHS with MPICH until v4.0b1.
2180    */
2181 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
2182   mumps->ICNTL20 = 0; /* Centralized dense RHS*/
2183 #else
2184   mumps->ICNTL20     = 10; /* Distributed dense RHS*/
2185 #endif
2186   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2187   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2188 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2189   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2190 #endif
2191   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2192 
2193   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2194   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2195   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2196   if (mumps->id.ICNTL(24)) { mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ }
2197 
2198   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2199   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2200   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2201   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2202   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2203   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2204   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2205   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2206   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2207   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2208   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2209   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2210   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2211 
2212   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", mumps->id.CNTL(1), &mumps->id.CNTL(1), NULL));
2213   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", mumps->id.CNTL(2), &mumps->id.CNTL(2), NULL));
2214   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", mumps->id.CNTL(3), &mumps->id.CNTL(3), NULL));
2215   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", mumps->id.CNTL(4), &mumps->id.CNTL(4), NULL));
2216   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", mumps->id.CNTL(5), &mumps->id.CNTL(5), NULL));
2217   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", mumps->id.CNTL(7), &mumps->id.CNTL(7), NULL));
2218 
2219   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2220 
2221   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2222   if (ninfo) {
2223     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2224     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2225     mumps->ninfo = ninfo;
2226     for (i = 0; i < ninfo; i++) {
2227       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2228       mumps->info[i] = info[i];
2229     }
2230   }
2231   PetscOptionsEnd();
2232   PetscFunctionReturn(PETSC_SUCCESS);
2233 }
2234 
2235 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, const MatFactorInfo *info, Mat_MUMPS *mumps)
2236 {
2237   PetscFunctionBegin;
2238   if (mumps->id.INFOG(1) < 0) {
2239     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2240     if (mumps->id.INFOG(1) == -6) {
2241       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2242       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2243     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2244       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2245       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2246     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
2247       PetscCall(PetscInfo(F, "MUMPS error in analysis: empty matrix\n"));
2248     } else {
2249       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2250       F->factorerrortype = MAT_FACTOR_OTHER;
2251     }
2252   }
2253   PetscFunctionReturn(PETSC_SUCCESS);
2254 }
2255 
2256 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2257 {
2258   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2259   Vec            b;
2260   const PetscInt M = A->rmap->N;
2261 
2262   PetscFunctionBegin;
2263   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2264     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2265     PetscFunctionReturn(PETSC_SUCCESS);
2266   }
2267 
2268   /* Set MUMPS options from the options database */
2269   PetscCall(MatSetFromOptions_MUMPS(F, A));
2270 
2271   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2272   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2273 
2274   /* analysis phase */
2275   mumps->id.job = JOB_FACTSYMBOLIC;
2276   mumps->id.n   = M;
2277   switch (mumps->id.ICNTL(18)) {
2278   case 0: /* centralized assembled matrix input */
2279     if (!mumps->myid) {
2280       mumps->id.nnz = mumps->nnz;
2281       mumps->id.irn = mumps->irn;
2282       mumps->id.jcn = mumps->jcn;
2283       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2284       if (r) {
2285         mumps->id.ICNTL(7) = 1;
2286         if (!mumps->myid) {
2287           const PetscInt *idx;
2288           PetscInt        i;
2289 
2290           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2291           PetscCall(ISGetIndices(r, &idx));
2292           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
2293           PetscCall(ISRestoreIndices(r, &idx));
2294         }
2295       }
2296     }
2297     break;
2298   case 3: /* distributed assembled matrix input (size>1) */
2299     mumps->id.nnz_loc = mumps->nnz;
2300     mumps->id.irn_loc = mumps->irn;
2301     mumps->id.jcn_loc = mumps->jcn;
2302     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2303     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2304       PetscCall(MatCreateVecs(A, NULL, &b));
2305       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2306       PetscCall(VecDestroy(&b));
2307     }
2308     break;
2309   }
2310   PetscMUMPS_c(mumps);
2311   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2312 
2313   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2314   F->ops->solve             = MatSolve_MUMPS;
2315   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2316   F->ops->matsolve          = MatMatSolve_MUMPS;
2317   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2318   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2319 
2320   mumps->matstruc = SAME_NONZERO_PATTERN;
2321   PetscFunctionReturn(PETSC_SUCCESS);
2322 }
2323 
2324 /* Note the Petsc r and c permutations are ignored */
2325 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
2326 {
2327   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2328   Vec            b;
2329   const PetscInt M = A->rmap->N;
2330 
2331   PetscFunctionBegin;
2332   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2333     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2334     PetscFunctionReturn(PETSC_SUCCESS);
2335   }
2336 
2337   /* Set MUMPS options from the options database */
2338   PetscCall(MatSetFromOptions_MUMPS(F, A));
2339 
2340   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2341   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2342 
2343   /* analysis phase */
2344   mumps->id.job = JOB_FACTSYMBOLIC;
2345   mumps->id.n   = M;
2346   switch (mumps->id.ICNTL(18)) {
2347   case 0: /* centralized assembled matrix input */
2348     if (!mumps->myid) {
2349       mumps->id.nnz = mumps->nnz;
2350       mumps->id.irn = mumps->irn;
2351       mumps->id.jcn = mumps->jcn;
2352       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2353     }
2354     break;
2355   case 3: /* distributed assembled matrix input (size>1) */
2356     mumps->id.nnz_loc = mumps->nnz;
2357     mumps->id.irn_loc = mumps->irn;
2358     mumps->id.jcn_loc = mumps->jcn;
2359     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2360     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2361       PetscCall(MatCreateVecs(A, NULL, &b));
2362       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2363       PetscCall(VecDestroy(&b));
2364     }
2365     break;
2366   }
2367   PetscMUMPS_c(mumps);
2368   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2369 
2370   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2371   F->ops->solve             = MatSolve_MUMPS;
2372   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2373   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2374 
2375   mumps->matstruc = SAME_NONZERO_PATTERN;
2376   PetscFunctionReturn(PETSC_SUCCESS);
2377 }
2378 
2379 /* Note the Petsc r permutation and factor info are ignored */
2380 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, IS r, const MatFactorInfo *info)
2381 {
2382   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2383   Vec            b;
2384   const PetscInt M = A->rmap->N;
2385 
2386   PetscFunctionBegin;
2387   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2388     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
2389     PetscFunctionReturn(PETSC_SUCCESS);
2390   }
2391 
2392   /* Set MUMPS options from the options database */
2393   PetscCall(MatSetFromOptions_MUMPS(F, A));
2394 
2395   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2396   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2397 
2398   /* analysis phase */
2399   mumps->id.job = JOB_FACTSYMBOLIC;
2400   mumps->id.n   = M;
2401   switch (mumps->id.ICNTL(18)) {
2402   case 0: /* centralized assembled matrix input */
2403     if (!mumps->myid) {
2404       mumps->id.nnz = mumps->nnz;
2405       mumps->id.irn = mumps->irn;
2406       mumps->id.jcn = mumps->jcn;
2407       if (mumps->id.ICNTL(6) > 1) mumps->id.a = (MumpsScalar *)mumps->val;
2408     }
2409     break;
2410   case 3: /* distributed assembled matrix input (size>1) */
2411     mumps->id.nnz_loc = mumps->nnz;
2412     mumps->id.irn_loc = mumps->irn;
2413     mumps->id.jcn_loc = mumps->jcn;
2414     if (mumps->id.ICNTL(6) > 1) mumps->id.a_loc = (MumpsScalar *)mumps->val;
2415     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2416       PetscCall(MatCreateVecs(A, NULL, &b));
2417       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2418       PetscCall(VecDestroy(&b));
2419     }
2420     break;
2421   }
2422   PetscMUMPS_c(mumps);
2423   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2424 
2425   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2426   F->ops->solve                 = MatSolve_MUMPS;
2427   F->ops->solvetranspose        = MatSolve_MUMPS;
2428   F->ops->matsolve              = MatMatSolve_MUMPS;
2429   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2430   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
2431 #if defined(PETSC_USE_COMPLEX)
2432   F->ops->getinertia = NULL;
2433 #else
2434   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2435 #endif
2436 
2437   mumps->matstruc = SAME_NONZERO_PATTERN;
2438   PetscFunctionReturn(PETSC_SUCCESS);
2439 }
2440 
2441 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
2442 {
2443   PetscBool         iascii;
2444   PetscViewerFormat format;
2445   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
2446 
2447   PetscFunctionBegin;
2448   /* check if matrix is mumps type */
2449   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
2450 
2451   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2452   if (iascii) {
2453     PetscCall(PetscViewerGetFormat(viewer, &format));
2454     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2455       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
2456       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
2457         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
2458         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
2459         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
2460         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
2461         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
2462         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
2463         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
2464         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
2465         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
2466         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
2467         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
2468         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
2469         if (mumps->id.ICNTL(11) > 0) {
2470           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", mumps->id.RINFOG(4)));
2471           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", mumps->id.RINFOG(5)));
2472           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", mumps->id.RINFOG(6)));
2473           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", mumps->id.RINFOG(7), mumps->id.RINFOG(8)));
2474           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", mumps->id.RINFOG(9)));
2475           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", mumps->id.RINFOG(10), mumps->id.RINFOG(11)));
2476         }
2477         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
2478         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
2479         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
2480         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
2481         /* ICNTL(15-17) not used */
2482         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
2483         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
2484         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
2485         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
2486         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
2487         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
2488 
2489         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
2490         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
2491         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
2492         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
2493         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
2494         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
2495 
2496         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
2497         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
2498         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
2499         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
2500         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
2501         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
2502         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
2503 
2504         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", mumps->id.CNTL(1)));
2505         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", mumps->id.CNTL(2)));
2506         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", mumps->id.CNTL(3)));
2507         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", mumps->id.CNTL(4)));
2508         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", mumps->id.CNTL(5)));
2509         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", mumps->id.CNTL(7)));
2510 
2511         /* information local to each processor */
2512         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
2513         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2514         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(1)));
2515         PetscCall(PetscViewerFlush(viewer));
2516         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
2517         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(2)));
2518         PetscCall(PetscViewerFlush(viewer));
2519         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
2520         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, mumps->id.RINFO(3)));
2521         PetscCall(PetscViewerFlush(viewer));
2522 
2523         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
2524         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
2525         PetscCall(PetscViewerFlush(viewer));
2526 
2527         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
2528         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
2529         PetscCall(PetscViewerFlush(viewer));
2530 
2531         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
2532         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
2533         PetscCall(PetscViewerFlush(viewer));
2534 
2535         if (mumps->ninfo && mumps->ninfo <= 80) {
2536           PetscInt i;
2537           for (i = 0; i < mumps->ninfo; i++) {
2538             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
2539             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
2540             PetscCall(PetscViewerFlush(viewer));
2541           }
2542         }
2543         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2544       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
2545 
2546       if (mumps->myid == 0) { /* information from the host */
2547         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", mumps->id.RINFOG(1)));
2548         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", mumps->id.RINFOG(2)));
2549         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", mumps->id.RINFOG(3)));
2550         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", mumps->id.RINFOG(12), mumps->id.RINFOG(13), mumps->id.INFOG(34)));
2551 
2552         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
2553         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
2554         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
2555         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
2556         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
2557         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
2558         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
2559         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
2560         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
2561         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
2562         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
2563         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
2564         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
2565         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
2566         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
2567         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
2568         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
2569         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
2570         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
2571         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
2572         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
2573         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
2574         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
2575         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
2576         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
2577         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
2578         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
2579         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
2580         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
2581         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
2582         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
2583         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
2584         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
2585         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
2586       }
2587     }
2588   }
2589   PetscFunctionReturn(PETSC_SUCCESS);
2590 }
2591 
2592 static PetscErrorCode MatGetInfo_MUMPS(Mat A, MatInfoType flag, MatInfo *info)
2593 {
2594   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
2595 
2596   PetscFunctionBegin;
2597   info->block_size        = 1.0;
2598   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2599   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
2600   info->nz_unneeded       = 0.0;
2601   info->assemblies        = 0.0;
2602   info->mallocs           = 0.0;
2603   info->memory            = 0.0;
2604   info->fill_ratio_given  = 0;
2605   info->fill_ratio_needed = 0;
2606   info->factor_mallocs    = 0;
2607   PetscFunctionReturn(PETSC_SUCCESS);
2608 }
2609 
2610 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2611 {
2612   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
2613   const PetscScalar *arr;
2614   const PetscInt    *idxs;
2615   PetscInt           size, i;
2616 
2617   PetscFunctionBegin;
2618   PetscCall(ISGetLocalSize(is, &size));
2619   /* Schur complement matrix */
2620   PetscCall(MatDestroy(&F->schur));
2621   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
2622   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
2623   mumps->id.schur      = (MumpsScalar *)arr;
2624   mumps->id.size_schur = size;
2625   mumps->id.schur_lld  = size;
2626   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
2627   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
2628 
2629   /* MUMPS expects Fortran style indices */
2630   PetscCall(PetscFree(mumps->id.listvar_schur));
2631   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
2632   PetscCall(ISGetIndices(is, &idxs));
2633   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &(mumps->id.listvar_schur[i])));
2634   PetscCall(ISRestoreIndices(is, &idxs));
2635   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2636   mumps->id.ICNTL(26) = -1;
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
2641 {
2642   Mat          St;
2643   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2644   PetscScalar *array;
2645 
2646   PetscFunctionBegin;
2647   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
2648   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
2649   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
2650   PetscCall(MatSetType(St, MATDENSE));
2651   PetscCall(MatSetUp(St));
2652   PetscCall(MatDenseGetArray(St, &array));
2653   if (!mumps->sym) {                /* MUMPS always return a full matrix */
2654     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2655       PetscInt i, j, N = mumps->id.size_schur;
2656       for (i = 0; i < N; i++) {
2657         for (j = 0; j < N; j++) {
2658 #if !defined(PETSC_USE_COMPLEX)
2659           PetscScalar val = mumps->id.schur[i * N + j];
2660 #else
2661           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2662 #endif
2663           array[j * N + i] = val;
2664         }
2665       }
2666     } else { /* stored by columns */
2667       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2668     }
2669   } else {                          /* either full or lower-triangular (not packed) */
2670     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2671       PetscInt i, j, N = mumps->id.size_schur;
2672       for (i = 0; i < N; i++) {
2673         for (j = i; j < N; j++) {
2674 #if !defined(PETSC_USE_COMPLEX)
2675           PetscScalar val = mumps->id.schur[i * N + j];
2676 #else
2677           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2678 #endif
2679           array[i * N + j] = array[j * N + i] = val;
2680         }
2681       }
2682     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2683       PetscCall(PetscArraycpy(array, mumps->id.schur, mumps->id.size_schur * mumps->id.size_schur));
2684     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2685       PetscInt i, j, N = mumps->id.size_schur;
2686       for (i = 0; i < N; i++) {
2687         for (j = 0; j < i + 1; j++) {
2688 #if !defined(PETSC_USE_COMPLEX)
2689           PetscScalar val = mumps->id.schur[i * N + j];
2690 #else
2691           PetscScalar val = mumps->id.schur[i * N + j].r + PETSC_i * mumps->id.schur[i * N + j].i;
2692 #endif
2693           array[i * N + j] = array[j * N + i] = val;
2694         }
2695       }
2696     }
2697   }
2698   PetscCall(MatDenseRestoreArray(St, &array));
2699   *S = St;
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
2704 {
2705   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2706 
2707   PetscFunctionBegin;
2708   if (mumps->id.job == JOB_NULL) {                                       /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2709     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2710     for (i = 0; i < nICNTL_pre; ++i)
2711       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
2712     if (i == nICNTL_pre) {                             /* not already cached */
2713       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
2714       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
2715       mumps->ICNTL_pre[0]++;
2716     }
2717     mumps->ICNTL_pre[1 + 2 * i] = icntl;
2718     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
2719   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722 
2723 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
2724 {
2725   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2726 
2727   PetscFunctionBegin;
2728   if (mumps->id.job == JOB_NULL) {
2729     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2730     *ival = 0;
2731     for (i = 0; i < nICNTL_pre; ++i) {
2732       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
2733     }
2734   } else *ival = mumps->id.ICNTL(icntl);
2735   PetscFunctionReturn(PETSC_SUCCESS);
2736 }
2737 
2738 /*@
2739   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2740 
2741   Logically Collective
2742 
2743   Input Parameters:
2744 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2745 . icntl - index of MUMPS parameter array ICNTL()
2746 - ival  - value of MUMPS ICNTL(icntl)
2747 
2748   Options Database Key:
2749 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2750 
2751   Level: beginner
2752 
2753   References:
2754 .  * - MUMPS Users' Guide
2755 
2756 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2757 @*/
2758 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
2759 {
2760   PetscFunctionBegin;
2761   PetscValidType(F, 1);
2762   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2763   PetscValidLogicalCollectiveInt(F, icntl, 2);
2764   PetscValidLogicalCollectiveInt(F, ival, 3);
2765   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2766   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
2767   PetscFunctionReturn(PETSC_SUCCESS);
2768 }
2769 
2770 /*@
2771   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2772 
2773   Logically Collective
2774 
2775   Input Parameters:
2776 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2777 - icntl - index of MUMPS parameter array ICNTL()
2778 
2779   Output Parameter:
2780 . ival - value of MUMPS ICNTL(icntl)
2781 
2782   Level: beginner
2783 
2784   References:
2785 .  * - MUMPS Users' Guide
2786 
2787 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2788 @*/
2789 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
2790 {
2791   PetscFunctionBegin;
2792   PetscValidType(F, 1);
2793   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2794   PetscValidLogicalCollectiveInt(F, icntl, 2);
2795   PetscAssertPointer(ival, 3);
2796   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
2797   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
2798   PetscFunctionReturn(PETSC_SUCCESS);
2799 }
2800 
2801 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
2802 {
2803   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2804 
2805   PetscFunctionBegin;
2806   if (mumps->id.job == JOB_NULL) {
2807     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2808     for (i = 0; i < nCNTL_pre; ++i)
2809       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
2810     if (i == nCNTL_pre) {
2811       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
2812       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
2813       mumps->CNTL_pre[0]++;
2814     }
2815     mumps->CNTL_pre[1 + 2 * i] = icntl;
2816     mumps->CNTL_pre[2 + 2 * i] = val;
2817   } else mumps->id.CNTL(icntl) = val;
2818   PetscFunctionReturn(PETSC_SUCCESS);
2819 }
2820 
2821 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
2822 {
2823   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2824 
2825   PetscFunctionBegin;
2826   if (mumps->id.job == JOB_NULL) {
2827     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2828     *val = 0.0;
2829     for (i = 0; i < nCNTL_pre; ++i) {
2830       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
2831     }
2832   } else *val = mumps->id.CNTL(icntl);
2833   PetscFunctionReturn(PETSC_SUCCESS);
2834 }
2835 
2836 /*@
2837   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2838 
2839   Logically Collective
2840 
2841   Input Parameters:
2842 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2843 . icntl - index of MUMPS parameter array CNTL()
2844 - val   - value of MUMPS CNTL(icntl)
2845 
2846   Options Database Key:
2847 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
2848 
2849   Level: beginner
2850 
2851   References:
2852 .  * - MUMPS Users' Guide
2853 
2854 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2855 @*/
2856 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
2857 {
2858   PetscFunctionBegin;
2859   PetscValidType(F, 1);
2860   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2861   PetscValidLogicalCollectiveInt(F, icntl, 2);
2862   PetscValidLogicalCollectiveReal(F, val, 3);
2863   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2864   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
2865   PetscFunctionReturn(PETSC_SUCCESS);
2866 }
2867 
2868 /*@
2869   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2870 
2871   Logically Collective
2872 
2873   Input Parameters:
2874 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
2875 - icntl - index of MUMPS parameter array CNTL()
2876 
2877   Output Parameter:
2878 . val - value of MUMPS CNTL(icntl)
2879 
2880   Level: beginner
2881 
2882   References:
2883 .  * - MUMPS Users' Guide
2884 
2885 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2886 @*/
2887 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
2888 {
2889   PetscFunctionBegin;
2890   PetscValidType(F, 1);
2891   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
2892   PetscValidLogicalCollectiveInt(F, icntl, 2);
2893   PetscAssertPointer(val, 3);
2894   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
2895   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
2896   PetscFunctionReturn(PETSC_SUCCESS);
2897 }
2898 
2899 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
2900 {
2901   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2902 
2903   PetscFunctionBegin;
2904   *info = mumps->id.INFO(icntl);
2905   PetscFunctionReturn(PETSC_SUCCESS);
2906 }
2907 
2908 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
2909 {
2910   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2911 
2912   PetscFunctionBegin;
2913   *infog = mumps->id.INFOG(icntl);
2914   PetscFunctionReturn(PETSC_SUCCESS);
2915 }
2916 
2917 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
2918 {
2919   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2920 
2921   PetscFunctionBegin;
2922   *rinfo = mumps->id.RINFO(icntl);
2923   PetscFunctionReturn(PETSC_SUCCESS);
2924 }
2925 
2926 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
2927 {
2928   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2929 
2930   PetscFunctionBegin;
2931   *rinfog = mumps->id.RINFOG(icntl);
2932   PetscFunctionReturn(PETSC_SUCCESS);
2933 }
2934 
2935 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
2936 {
2937   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2938 
2939   PetscFunctionBegin;
2940   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2941   *size  = 0;
2942   *array = NULL;
2943   if (!mumps->myid) {
2944     *size = mumps->id.INFOG(28);
2945     PetscCall(PetscMalloc1(*size, array));
2946     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
2947   }
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
2952 {
2953   Mat          Bt = NULL, Btseq = NULL;
2954   PetscBool    flg;
2955   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
2956   PetscScalar *aa;
2957   PetscInt     spnr, *ia, *ja, M, nrhs;
2958 
2959   PetscFunctionBegin;
2960   PetscAssertPointer(spRHS, 2);
2961   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
2962   if (flg) {
2963     PetscCall(MatTransposeGetMat(spRHS, &Bt));
2964   } else SETERRQ(PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
2965 
2966   PetscCall(MatMumpsSetIcntl(F, 30, 1));
2967 
2968   if (mumps->petsc_size > 1) {
2969     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
2970     Btseq         = b->A;
2971   } else {
2972     Btseq = Bt;
2973   }
2974 
2975   PetscCall(MatGetSize(spRHS, &M, &nrhs));
2976   mumps->id.nrhs = nrhs;
2977   mumps->id.lrhs = M;
2978   mumps->id.rhs  = NULL;
2979 
2980   if (!mumps->myid) {
2981     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
2982     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2983     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2984     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2985     mumps->id.rhs_sparse = (MumpsScalar *)aa;
2986   } else {
2987     mumps->id.irhs_ptr    = NULL;
2988     mumps->id.irhs_sparse = NULL;
2989     mumps->id.nz_rhs      = 0;
2990     mumps->id.rhs_sparse  = NULL;
2991   }
2992   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2993   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2994 
2995   /* solve phase */
2996   mumps->id.job = JOB_SOLVE;
2997   PetscMUMPS_c(mumps);
2998   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2999 
3000   if (!mumps->myid) {
3001     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3002     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3003     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3004   }
3005   PetscFunctionReturn(PETSC_SUCCESS);
3006 }
3007 
3008 /*@
3009   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A`
3010 
3011   Logically Collective
3012 
3013   Input Parameter:
3014 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3015 
3016   Output Parameter:
3017 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3018 
3019   Level: beginner
3020 
3021   References:
3022 .  * - MUMPS Users' Guide
3023 
3024 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3025 @*/
3026 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3027 {
3028   PetscFunctionBegin;
3029   PetscValidType(F, 1);
3030   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3031   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3032   PetscFunctionReturn(PETSC_SUCCESS);
3033 }
3034 
3035 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3036 {
3037   Mat spRHS;
3038 
3039   PetscFunctionBegin;
3040   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3041   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3042   PetscCall(MatDestroy(&spRHS));
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 /*@
3047   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix `A`^T
3048 
3049   Logically Collective
3050 
3051   Input Parameter:
3052 . F - the factored matrix of A obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3053 
3054   Output Parameter:
3055 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3056 
3057   Level: beginner
3058 
3059   References:
3060 .  * - MUMPS Users' Guide
3061 
3062 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3063 @*/
3064 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3065 {
3066   PetscBool flg;
3067 
3068   PetscFunctionBegin;
3069   PetscValidType(F, 1);
3070   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3071   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3072   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3073 
3074   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3075   PetscFunctionReturn(PETSC_SUCCESS);
3076 }
3077 
3078 /*@
3079   MatMumpsGetInfo - Get MUMPS parameter INFO()
3080 
3081   Logically Collective
3082 
3083   Input Parameters:
3084 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3085 - icntl - index of MUMPS parameter array INFO()
3086 
3087   Output Parameter:
3088 . ival - value of MUMPS INFO(icntl)
3089 
3090   Level: beginner
3091 
3092   References:
3093 .  * - MUMPS Users' Guide
3094 
3095 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3096 @*/
3097 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3098 {
3099   PetscFunctionBegin;
3100   PetscValidType(F, 1);
3101   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3102   PetscAssertPointer(ival, 3);
3103   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3104   PetscFunctionReturn(PETSC_SUCCESS);
3105 }
3106 
3107 /*@
3108   MatMumpsGetInfog - Get MUMPS parameter INFOG()
3109 
3110   Logically Collective
3111 
3112   Input Parameters:
3113 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3114 - icntl - index of MUMPS parameter array INFOG()
3115 
3116   Output Parameter:
3117 . ival - value of MUMPS INFOG(icntl)
3118 
3119   Level: beginner
3120 
3121   References:
3122 .  * - MUMPS Users' Guide
3123 
3124 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3125 @*/
3126 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3127 {
3128   PetscFunctionBegin;
3129   PetscValidType(F, 1);
3130   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3131   PetscAssertPointer(ival, 3);
3132   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 /*@
3137   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
3138 
3139   Logically Collective
3140 
3141   Input Parameters:
3142 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3143 - icntl - index of MUMPS parameter array RINFO()
3144 
3145   Output Parameter:
3146 . val - value of MUMPS RINFO(icntl)
3147 
3148   Level: beginner
3149 
3150   References:
3151 .  * - MUMPS Users' Guide
3152 
3153 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3154 @*/
3155 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3156 {
3157   PetscFunctionBegin;
3158   PetscValidType(F, 1);
3159   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3160   PetscAssertPointer(val, 3);
3161   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3162   PetscFunctionReturn(PETSC_SUCCESS);
3163 }
3164 
3165 /*@
3166   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
3167 
3168   Logically Collective
3169 
3170   Input Parameters:
3171 + F     - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3172 - icntl - index of MUMPS parameter array RINFOG()
3173 
3174   Output Parameter:
3175 . val - value of MUMPS RINFOG(icntl)
3176 
3177   Level: beginner
3178 
3179   References:
3180 .  * - MUMPS Users' Guide
3181 
3182 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3183 @*/
3184 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3185 {
3186   PetscFunctionBegin;
3187   PetscValidType(F, 1);
3188   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3189   PetscAssertPointer(val, 3);
3190   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3191   PetscFunctionReturn(PETSC_SUCCESS);
3192 }
3193 
3194 /*@
3195   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST()
3196 
3197   Logically Collective
3198 
3199   Input Parameter:
3200 . F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-MUMPS interface
3201 
3202   Output Parameters:
3203 + size  - local size of the array. The size of the array is non-zero only on the host.
3204 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3205            for freeing this array.
3206 
3207   Level: beginner
3208 
3209   References:
3210 .  * - MUMPS Users' Guide
3211 
3212 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3213 @*/
3214 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3215 {
3216   PetscFunctionBegin;
3217   PetscValidType(F, 1);
3218   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3219   PetscAssertPointer(size, 2);
3220   PetscAssertPointer(array, 3);
3221   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3222   PetscFunctionReturn(PETSC_SUCCESS);
3223 }
3224 
3225 /*MC
3226   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3227   distributed and sequential matrices via the external package MUMPS.
3228 
3229   Works with `MATAIJ` and `MATSBAIJ` matrices
3230 
3231   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3232 
3233   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3234   See details below.
3235 
3236   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3237 
3238   Options Database Keys:
3239 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
3240 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3241 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
3242 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
3243 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3244 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3245                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3246 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3247 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
3248 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3249 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3250 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3251 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
3252 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
3253 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
3254 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3255 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3256 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3257 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
3258 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3259 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
3260 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
3261 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3262 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3263 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3264 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3265 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3266 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3267 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3268 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3269 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
3270 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
3271 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
3272 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
3273 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
3274 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
3275 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3276                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3277 
3278   Level: beginner
3279 
3280     Notes:
3281     MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at https://mumps-solver.org/index.php?page=doc) so using it will
3282     error if the matrix is Hermitian.
3283 
3284     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3285     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3286 
3287     When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3288     the failure with
3289 .vb
3290           KSPGetPC(ksp,&pc);
3291           PCFactorGetMatrix(pc,&mat);
3292           MatMumpsGetInfo(mat,....);
3293           MatMumpsGetInfog(mat,....); etc.
3294 .ve
3295     Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3296 
3297     MUMPS provides 64-bit integer support in two build modes:
3298       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3299       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3300 
3301       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3302       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3303       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3304       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3305 
3306     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3307 
3308   Two modes to run MUMPS/PETSc with OpenMP
3309 .vb
3310      Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3311      threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
3312 .ve
3313 
3314 .vb
3315      -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
3316     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3317 .ve
3318 
3319    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3320    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3321    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3322    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3323    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
3324 
3325    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3326    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3327    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3328    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3329    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3330    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3331    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3332    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3333    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3334    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
3335    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
3336    examine the mapping result.
3337 
3338    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3339    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3340    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3341 
3342    References:
3343 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3344 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3345 
3346 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3347 M*/
3348 
3349 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A, MatSolverType *type)
3350 {
3351   PetscFunctionBegin;
3352   *type = MATSOLVERMUMPS;
3353   PetscFunctionReturn(PETSC_SUCCESS);
3354 }
3355 
3356 /* MatGetFactor for Seq and MPI AIJ matrices */
3357 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3358 {
3359   Mat         B;
3360   Mat_MUMPS  *mumps;
3361   PetscBool   isSeqAIJ, isDiag;
3362   PetscMPIInt size;
3363 
3364   PetscFunctionBegin;
3365 #if defined(PETSC_USE_COMPLEX)
3366   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3367     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3368     *F = NULL;
3369     PetscFunctionReturn(PETSC_SUCCESS);
3370   }
3371 #endif
3372   /* Create the factorization matrix */
3373   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
3374   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
3375   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3376   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3377   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3378   PetscCall(MatSetUp(B));
3379 
3380   PetscCall(PetscNew(&mumps));
3381 
3382   B->ops->view    = MatView_MUMPS;
3383   B->ops->getinfo = MatGetInfo_MUMPS;
3384 
3385   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3386   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3387   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3388   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3389   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3390   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3391   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3392   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3393   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3394   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3395   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3396   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3397   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3398   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3399 
3400   if (ftype == MAT_FACTOR_LU) {
3401     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3402     B->factortype            = MAT_FACTOR_LU;
3403     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3404     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3405     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3406     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3407     mumps->sym = 0;
3408   } else {
3409     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3410     B->factortype                  = MAT_FACTOR_CHOLESKY;
3411     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3412     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
3413     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3414     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3415 #if defined(PETSC_USE_COMPLEX)
3416     mumps->sym = 2;
3417 #else
3418     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3419     else mumps->sym = 2;
3420 #endif
3421   }
3422 
3423   /* set solvertype */
3424   PetscCall(PetscFree(B->solvertype));
3425   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3426   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3427   if (size == 1) {
3428     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3429     B->canuseordering = PETSC_TRUE;
3430   }
3431   B->ops->destroy = MatDestroy_MUMPS;
3432   B->data         = (void *)mumps;
3433 
3434   *F               = B;
3435   mumps->id.job    = JOB_NULL;
3436   mumps->ICNTL_pre = NULL;
3437   mumps->CNTL_pre  = NULL;
3438   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3439   PetscFunctionReturn(PETSC_SUCCESS);
3440 }
3441 
3442 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3443 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, MatFactorType ftype, Mat *F)
3444 {
3445   Mat         B;
3446   Mat_MUMPS  *mumps;
3447   PetscBool   isSeqSBAIJ;
3448   PetscMPIInt size;
3449 
3450   PetscFunctionBegin;
3451 #if defined(PETSC_USE_COMPLEX)
3452   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3453     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3454     *F = NULL;
3455     PetscFunctionReturn(PETSC_SUCCESS);
3456   }
3457 #endif
3458   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3459   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3460   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3461   PetscCall(MatSetUp(B));
3462 
3463   PetscCall(PetscNew(&mumps));
3464   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
3465   if (isSeqSBAIJ) {
3466     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3467   } else {
3468     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3469   }
3470 
3471   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3472   B->ops->view                   = MatView_MUMPS;
3473   B->ops->getinfo                = MatGetInfo_MUMPS;
3474 
3475   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3476   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3477   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3478   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3479   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3480   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3481   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3482   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3483   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3484   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3485   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3486   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3487   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3488   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3489 
3490   B->factortype = MAT_FACTOR_CHOLESKY;
3491 #if defined(PETSC_USE_COMPLEX)
3492   mumps->sym = 2;
3493 #else
3494   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3495   else mumps->sym = 2;
3496 #endif
3497 
3498   /* set solvertype */
3499   PetscCall(PetscFree(B->solvertype));
3500   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3501   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3502   if (size == 1) {
3503     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3504     B->canuseordering = PETSC_TRUE;
3505   }
3506   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3507   B->ops->destroy = MatDestroy_MUMPS;
3508   B->data         = (void *)mumps;
3509 
3510   *F               = B;
3511   mumps->id.job    = JOB_NULL;
3512   mumps->ICNTL_pre = NULL;
3513   mumps->CNTL_pre  = NULL;
3514   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3515   PetscFunctionReturn(PETSC_SUCCESS);
3516 }
3517 
3518 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
3519 {
3520   Mat         B;
3521   Mat_MUMPS  *mumps;
3522   PetscBool   isSeqBAIJ;
3523   PetscMPIInt size;
3524 
3525   PetscFunctionBegin;
3526   /* Create the factorization matrix */
3527   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
3528   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3529   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3530   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3531   PetscCall(MatSetUp(B));
3532 
3533   PetscCall(PetscNew(&mumps));
3534   if (ftype == MAT_FACTOR_LU) {
3535     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3536     B->factortype            = MAT_FACTOR_LU;
3537     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3538     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3539     mumps->sym = 0;
3540     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3541   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3542 
3543   B->ops->view    = MatView_MUMPS;
3544   B->ops->getinfo = MatGetInfo_MUMPS;
3545 
3546   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3547   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3548   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3549   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3550   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3551   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3552   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3553   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3554   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3555   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3556   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3557   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3558   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3559   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3560 
3561   /* set solvertype */
3562   PetscCall(PetscFree(B->solvertype));
3563   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3564   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3565   if (size == 1) {
3566     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3567     B->canuseordering = PETSC_TRUE;
3568   }
3569   B->ops->destroy = MatDestroy_MUMPS;
3570   B->data         = (void *)mumps;
3571 
3572   *F               = B;
3573   mumps->id.job    = JOB_NULL;
3574   mumps->ICNTL_pre = NULL;
3575   mumps->CNTL_pre  = NULL;
3576   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3577   PetscFunctionReturn(PETSC_SUCCESS);
3578 }
3579 
3580 /* MatGetFactor for Seq and MPI SELL matrices */
3581 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
3582 {
3583   Mat         B;
3584   Mat_MUMPS  *mumps;
3585   PetscBool   isSeqSELL;
3586   PetscMPIInt size;
3587 
3588   PetscFunctionBegin;
3589   /* Create the factorization matrix */
3590   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
3591   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3592   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3593   PetscCall(PetscStrallocpy("mumps", &((PetscObject)B)->type_name));
3594   PetscCall(MatSetUp(B));
3595 
3596   PetscCall(PetscNew(&mumps));
3597 
3598   B->ops->view    = MatView_MUMPS;
3599   B->ops->getinfo = MatGetInfo_MUMPS;
3600 
3601   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3602   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3603   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3604   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3605   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3606   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3607   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3608   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3609   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3610   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3611   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3612   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3613 
3614   if (ftype == MAT_FACTOR_LU) {
3615     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3616     B->factortype            = MAT_FACTOR_LU;
3617     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3618     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3619     mumps->sym = 0;
3620     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
3621   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
3622 
3623   /* set solvertype */
3624   PetscCall(PetscFree(B->solvertype));
3625   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3626   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3627   if (size == 1) {
3628     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3629     B->canuseordering = PETSC_TRUE;
3630   }
3631   B->ops->destroy = MatDestroy_MUMPS;
3632   B->data         = (void *)mumps;
3633 
3634   *F               = B;
3635   mumps->id.job    = JOB_NULL;
3636   mumps->ICNTL_pre = NULL;
3637   mumps->CNTL_pre  = NULL;
3638   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3639   PetscFunctionReturn(PETSC_SUCCESS);
3640 }
3641 
3642 /* MatGetFactor for MATNEST matrices */
3643 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
3644 {
3645   Mat         B, **mats;
3646   Mat_MUMPS  *mumps;
3647   PetscInt    nr, nc;
3648   PetscMPIInt size;
3649   PetscBool   flg = PETSC_TRUE;
3650 
3651   PetscFunctionBegin;
3652 #if defined(PETSC_USE_COMPLEX)
3653   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
3654     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
3655     *F = NULL;
3656     PetscFunctionReturn(PETSC_SUCCESS);
3657   }
3658 #endif
3659 
3660   /* Return if some condition is not satisfied */
3661   *F = NULL;
3662   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
3663   if (ftype == MAT_FACTOR_CHOLESKY) {
3664     IS       *rows, *cols;
3665     PetscInt *m, *M;
3666 
3667     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
3668     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
3669     PetscCall(MatNestGetISs(A, rows, cols));
3670     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
3671     if (!flg) {
3672       PetscCall(PetscFree2(rows, cols));
3673       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
3674       PetscFunctionReturn(PETSC_SUCCESS);
3675     }
3676     PetscCall(PetscMalloc2(nr, &m, nr, &M));
3677     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
3678     for (PetscInt r = 0; flg && r < nr; r++)
3679       for (PetscInt k = r + 1; flg && k < nr; k++)
3680         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
3681     PetscCall(PetscFree2(m, M));
3682     PetscCall(PetscFree2(rows, cols));
3683     if (!flg) {
3684       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
3685       PetscFunctionReturn(PETSC_SUCCESS);
3686     }
3687   }
3688 
3689   for (PetscInt r = 0; r < nr; r++) {
3690     for (PetscInt c = 0; c < nc; c++) {
3691       Mat       sub = mats[r][c];
3692       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isTrans, isDiag;
3693 
3694       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
3695       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATTRANSPOSEVIRTUAL, &isTrans));
3696       if (isTrans) PetscCall(MatTransposeGetMat(sub, &sub));
3697       else {
3698         PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATHERMITIANTRANSPOSEVIRTUAL, &isTrans));
3699         if (isTrans) PetscCall(MatHermitianTransposeGetMat(sub, &sub));
3700       }
3701       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
3702       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
3703       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
3704       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
3705       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
3706       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
3707       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
3708       if (ftype == MAT_FACTOR_CHOLESKY) {
3709         if (r == c) {
3710           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag) {
3711             PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3712             flg = PETSC_FALSE;
3713           }
3714         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) {
3715           PetscCall(PetscInfo(sub, "MAT_CHOLESKY_FACTOR not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
3716           flg = PETSC_FALSE;
3717         }
3718       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag) {
3719         PetscCall(PetscInfo(sub, "MAT_LU_FACTOR not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
3720         flg = PETSC_FALSE;
3721       }
3722     }
3723   }
3724   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
3725 
3726   /* Create the factorization matrix */
3727   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
3728   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
3729   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
3730   PetscCall(MatSetUp(B));
3731 
3732   PetscCall(PetscNew(&mumps));
3733 
3734   B->ops->view    = MatView_MUMPS;
3735   B->ops->getinfo = MatGetInfo_MUMPS;
3736 
3737   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
3738   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
3739   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
3740   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
3741   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
3742   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
3743   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
3744   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
3745   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
3746   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
3747   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
3748   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
3749   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
3750   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
3751 
3752   if (ftype == MAT_FACTOR_LU) {
3753     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3754     B->factortype            = MAT_FACTOR_LU;
3755     mumps->sym               = 0;
3756   } else {
3757     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3758     B->factortype                  = MAT_FACTOR_CHOLESKY;
3759 #if defined(PETSC_USE_COMPLEX)
3760     mumps->sym = 2;
3761 #else
3762     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3763     else mumps->sym = 2;
3764 #endif
3765   }
3766   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
3767   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
3768 
3769   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
3770   if (size == 1) {
3771     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3772     B->canuseordering = PETSC_TRUE;
3773   }
3774 
3775   /* set solvertype */
3776   PetscCall(PetscFree(B->solvertype));
3777   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
3778   B->ops->destroy = MatDestroy_MUMPS;
3779   B->data         = (void *)mumps;
3780 
3781   *F               = B;
3782   mumps->id.job    = JOB_NULL;
3783   mumps->ICNTL_pre = NULL;
3784   mumps->CNTL_pre  = NULL;
3785   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
3786   PetscFunctionReturn(PETSC_SUCCESS);
3787 }
3788 
3789 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3790 {
3791   PetscFunctionBegin;
3792   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3793   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3794   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3795   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3796   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3797   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3798   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3799   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
3800   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
3801   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
3802   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
3803   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
3804   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
3805   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
3806   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
3807   PetscFunctionReturn(PETSC_SUCCESS);
3808 }
3809