xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 7e1a0bbe36d2be40a00a95404ece00db4857f70d)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 #include <petsc/private/vecimpl.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15   #include <cmumps_c.h>
16   #include <zmumps_c.h>
17   #include <smumps_c.h>
18   #include <dmumps_c.h>
19 #else
20   #if defined(PETSC_USE_COMPLEX)
21     #if defined(PETSC_USE_REAL_SINGLE)
22       #include <cmumps_c.h>
23       #define MUMPS_c       cmumps_c
24       #define MUMPS_STRUC_C CMUMPS_STRUC_C
25       #define MumpsScalar   CMUMPS_COMPLEX
26     #else
27       #include <zmumps_c.h>
28       #define MUMPS_c       zmumps_c
29       #define MUMPS_STRUC_C ZMUMPS_STRUC_C
30       #define MumpsScalar   ZMUMPS_COMPLEX
31     #endif
32   #else
33     #if defined(PETSC_USE_REAL_SINGLE)
34       #include <smumps_c.h>
35       #define MUMPS_c       smumps_c
36       #define MUMPS_STRUC_C SMUMPS_STRUC_C
37       #define MumpsScalar   SMUMPS_REAL
38     #else
39       #include <dmumps_c.h>
40       #define MUMPS_c       dmumps_c
41       #define MUMPS_STRUC_C DMUMPS_STRUC_C
42       #define MumpsScalar   DMUMPS_REAL
43     #endif
44   #endif
45 #endif
46 EXTERN_C_END
47 
48 #define JOB_INIT         -1
49 #define JOB_NULL         0
50 #define JOB_FACTSYMBOLIC 1
51 #define JOB_FACTNUMERIC  2
52 #define JOB_SOLVE        3
53 #define JOB_END          -2
54 
55 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
56    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
57    naming convention in PetscMPIInt, PetscBLASInt etc.
58 */
59 typedef MUMPS_INT PetscMUMPSInt;
60 
61 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
62   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
63     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
64   #endif
65 #else
66   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
67     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
68   #endif
69 #endif
70 
71 #define MPIU_MUMPSINT       MPI_INT
72 #define PETSC_MUMPS_INT_MAX 2147483647
73 #define PETSC_MUMPS_INT_MIN -2147483648
74 
75 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
76 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
77 {
78   PetscFunctionBegin;
79 #if PetscDefined(USE_64BIT_INDICES)
80   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
81 #endif
82   *b = (PetscMUMPSInt)a;
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 /* Put these utility routines here since they are only used in this file */
87 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
88 {
89   PetscInt  myval;
90   PetscBool myset;
91 
92   PetscFunctionBegin;
93   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
94   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
95   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
96   if (set) *set = myset;
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
100 
101 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
102 //
103 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
104 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
105 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
106 //
107 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
108 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
109 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
110 typedef struct {
111   PetscPrecision precision;   // precision used by MUMPS
112   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
113 
114   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
115   MUMPS_INT  sym, par, job;
116   MUMPS_INT  comm_fortran; /* Fortran communicator */
117   MUMPS_INT *icntl;
118   void      *cntl; // MumpsReal, fixed size array
119   MUMPS_INT  n;
120   MUMPS_INT  nblk;
121 
122   /* Assembled entry */
123   MUMPS_INT8 nnz;
124   MUMPS_INT *irn;
125   MUMPS_INT *jcn;
126   void      *a; // MumpsScalar, centralized input
127   PetscCount a_len;
128 
129   /* Distributed entry */
130   MUMPS_INT8 nnz_loc;
131   MUMPS_INT *irn_loc;
132   MUMPS_INT *jcn_loc;
133   void      *a_loc; // MumpsScalar, distributed input
134   PetscCount a_loc_len;
135 
136   /* Matrix by blocks */
137   MUMPS_INT *blkptr;
138   MUMPS_INT *blkvar;
139 
140   /* Ordering, if given by user */
141   MUMPS_INT *perm_in;
142 
143   /* RHS, solution, ouptput data and statistics */
144   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
145   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
146 
147   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
148   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
149   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
150   MUMPS_INT  schur_lld;
151   MUMPS_INT *info, *infog;   // fixed size array
152   void      *rinfo, *rinfog; // MumpsReal, fixed size array
153 
154   /* Null space */
155   MUMPS_INT *pivnul_list; // allocated by MUMPS!
156   MUMPS_INT *mapping;     // allocated by MUMPS!
157 
158   /* Schur */
159   MUMPS_INT  size_schur;
160   MUMPS_INT *listvar_schur;
161   void      *schur; // MumpsScalar
162   PetscCount schur_len;
163 
164   /* For out-of-core */
165   char *ooc_tmpdir; // fixed size array
166   char *ooc_prefix; // fixed size array
167 } XMUMPS_STRUC_C;
168 
169 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
170 #define AllocatInternalID(MUMPS_STRUC_T, outer) \
171   do { \
172     MUMPS_STRUC_T *inner; \
173     PetscCall(PetscNew(&inner)); \
174     outer->icntl      = inner->icntl; \
175     outer->cntl       = inner->cntl; \
176     outer->info       = inner->info; \
177     outer->infog      = inner->infog; \
178     outer->rinfo      = inner->rinfo; \
179     outer->rinfog     = inner->rinfog; \
180     outer->ooc_tmpdir = inner->ooc_tmpdir; \
181     outer->ooc_prefix = inner->ooc_prefix; \
182     /* the three field should never change after init */ \
183     inner->comm_fortran = outer->comm_fortran; \
184     inner->par          = outer->par; \
185     inner->sym          = outer->sym; \
186     outer->internal_id  = inner; \
187   } while (0)
188 
189 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
190 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
191 {
192   PetscFunctionBegin;
193   outer->precision = precision;
194 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
195   #if defined(PETSC_USE_COMPLEX)
196   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
197   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
198   #else
199   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
200   else AllocatInternalID(DMUMPS_STRUC_C, outer);
201   #endif
202 #else
203   AllocatInternalID(MUMPS_STRUC_C, outer);
204 #endif
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
209   do { \
210     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
211     PetscCall(PetscFree(inner->a)); \
212     PetscCall(PetscFree(inner->a_loc)); \
213     PetscCall(PetscFree(inner->redrhs)); \
214     PetscCall(PetscFree(inner->rhs)); \
215     PetscCall(PetscFree(inner->rhs_sparse)); \
216     PetscCall(PetscFree(inner->rhs_loc)); \
217     PetscCall(PetscFree(inner->sol_loc)); \
218     PetscCall(PetscFree(inner->schur)); \
219   } while (0)
220 
221 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
222 {
223   PetscFunctionBegin;
224   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
225 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
226     const PetscPrecision mumps_precision = outer->precision;
227     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
228   #if defined(PETSC_USE_COMPLEX)
229       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
230       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
231   #else
232       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
233       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
234   #endif
235     }
236 #endif
237     PetscCall(PetscFree(outer->internal_id));
238   }
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
243 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
244 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
245 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
246 //
247 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
248 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
249 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
250 //
251 //
252 //   Input parameters:
253 // + convert   - whether to do PetscScalar to MumpsScalar conversion
254 // . n         - length of the PetscScalar array
255 // . pa        - [n]], points to the PetscScalar array
256 // . precision - precision of MumpsScalar
257 // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
258 // - ma        - on input, an existing MumpsScalar array if any.
259 //
260 //   Output parameters:
261 // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
262 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
263 //
264 //   Note:
265 //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
266 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
267 {
268   PetscFunctionBegin;
269 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
270   const PetscPrecision mumps_precision = precision;
271   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
272   #if defined(PETSC_USE_COMPLEX)
273   if (mumps_precision != PETSC_SCALAR_PRECISION) {
274     if (mumps_precision == PETSC_PRECISION_SINGLE) {
275       if (*m < n) {
276         PetscCall(PetscFree(*ma));
277         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
278         *m = n;
279       }
280       if (convert) {
281         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
282         for (PetscCount i = 0; i < n; i++) {
283           b[i].r = PetscRealPart(pa[i]);
284           b[i].i = PetscImaginaryPart(pa[i]);
285         }
286       }
287     } else {
288       if (*m < n) {
289         PetscCall(PetscFree(*ma));
290         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
291         *m = n;
292       }
293       if (convert) {
294         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
295         for (PetscCount i = 0; i < n; i++) {
296           b[i].r = PetscRealPart(pa[i]);
297           b[i].i = PetscImaginaryPart(pa[i]);
298         }
299       }
300     }
301   }
302   #else
303   if (mumps_precision != PETSC_SCALAR_PRECISION) {
304     if (mumps_precision == PETSC_PRECISION_SINGLE) {
305       if (*m < n) {
306         PetscCall(PetscFree(*ma));
307         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
308         *m = n;
309       }
310       if (convert) {
311         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
312         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
313       }
314     } else {
315       if (*m < n) {
316         PetscCall(PetscFree(*ma));
317         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
318         *m = n;
319       }
320       if (convert) {
321         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
322         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
323       }
324     }
325   }
326   #endif
327   else
328 #endif
329   {
330     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
331     *ma = (void *)pa;                       // same precision, make them alias
332     *m  = 0;
333   }
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336 
337 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
338 //
339 // 1) If the two types are different, cast array elements.
340 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
341 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
342 {
343   PetscFunctionBegin;
344 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
345   if (mumps_precision != PETSC_SCALAR_PRECISION) {
346   #if defined(PETSC_USE_COMPLEX)
347     if (mumps_precision == PETSC_PRECISION_SINGLE) {
348       PetscReal         *a = (PetscReal *)pa;
349       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
350       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
351     } else {
352       PetscReal         *a = (PetscReal *)pa;
353       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
354       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
355     }
356   #else
357     if (mumps_precision == PETSC_PRECISION_SINGLE) {
358       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
360     } else {
361       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
362       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
363     }
364   #endif
365   } else
366 #endif
367     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
372 //
373 // 1) If the two types are different, cast array elements.
374 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
375 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
376 {
377   PetscFunctionBegin;
378 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
379   if (mumps_precision != PETSC_SCALAR_PRECISION) {
380   #if defined(PETSC_USE_COMPLEX)
381     if (mumps_precision == PETSC_PRECISION_SINGLE) {
382       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
383       for (PetscCount i = 0; i < n; i++) {
384         b[i].r = PetscRealPart(pa[i]);
385         b[i].i = PetscImaginaryPart(pa[i]);
386       }
387     } else {
388       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
389       for (PetscCount i = 0; i < n; i++) {
390         b[i].r = PetscRealPart(pa[i]);
391         b[i].i = PetscImaginaryPart(pa[i]);
392       }
393     }
394   #else
395     if (mumps_precision == PETSC_PRECISION_SINGLE) {
396       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
397       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
398     } else {
399       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
400       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
401     }
402   #endif
403   } else
404 #endif
405     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
409 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
410 {
411   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
412 }
413 
414 #define PreMumpsCall(inner, outer, mumpsscalar) \
415   do { \
416     inner->job           = outer->job; \
417     inner->n             = outer->n; \
418     inner->nblk          = outer->nblk; \
419     inner->nnz           = outer->nnz; \
420     inner->irn           = outer->irn; \
421     inner->jcn           = outer->jcn; \
422     inner->a             = (mumpsscalar *)outer->a; \
423     inner->nnz_loc       = outer->nnz_loc; \
424     inner->irn_loc       = outer->irn_loc; \
425     inner->jcn_loc       = outer->jcn_loc; \
426     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
427     inner->blkptr        = outer->blkptr; \
428     inner->blkvar        = outer->blkvar; \
429     inner->perm_in       = outer->perm_in; \
430     inner->rhs           = (mumpsscalar *)outer->rhs; \
431     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
432     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
433     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
434     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
435     inner->irhs_sparse   = outer->irhs_sparse; \
436     inner->irhs_ptr      = outer->irhs_ptr; \
437     inner->isol_loc      = outer->isol_loc; \
438     inner->irhs_loc      = outer->irhs_loc; \
439     inner->nrhs          = outer->nrhs; \
440     inner->lrhs          = outer->lrhs; \
441     inner->lredrhs       = outer->lredrhs; \
442     inner->nz_rhs        = outer->nz_rhs; \
443     inner->lsol_loc      = outer->lsol_loc; \
444     inner->nloc_rhs      = outer->nloc_rhs; \
445     inner->lrhs_loc      = outer->lrhs_loc; \
446     inner->schur_lld     = outer->schur_lld; \
447     inner->size_schur    = outer->size_schur; \
448     inner->listvar_schur = outer->listvar_schur; \
449     inner->schur         = (mumpsscalar *)outer->schur; \
450   } while (0)
451 
452 #define PostMumpsCall(inner, outer) \
453   do { \
454     outer->pivnul_list = inner->pivnul_list; \
455     outer->mapping     = inner->mapping; \
456   } while (0)
457 
458 // Entry for PETSc to call mumps
459 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
460 {
461   PetscFunctionBegin;
462 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
463   #if defined(PETSC_USE_COMPLEX)
464   if (outer->precision == PETSC_PRECISION_SINGLE) {
465     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
466     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
467     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
468     PostMumpsCall(inner, outer);
469   } else {
470     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
471     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
472     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
473     PostMumpsCall(inner, outer);
474   }
475   #else
476   if (outer->precision == PETSC_PRECISION_SINGLE) {
477     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
478     PreMumpsCall(inner, outer, SMUMPS_REAL);
479     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
480     PostMumpsCall(inner, outer);
481   } else {
482     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
483     PreMumpsCall(inner, outer, DMUMPS_REAL);
484     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
485     PostMumpsCall(inner, outer);
486   }
487   #endif
488 #else
489   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
490   PreMumpsCall(inner, outer, MumpsScalar);
491   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
492   PostMumpsCall(inner, outer);
493 #endif
494   PetscFunctionReturn(PETSC_SUCCESS);
495 }
496 
497 /* macros s.t. indices match MUMPS documentation */
498 #define ICNTL(I) icntl[(I) - 1]
499 #define INFOG(I) infog[(I) - 1]
500 #define INFO(I)  info[(I) - 1]
501 
502 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
503 #if defined(PETSC_USE_COMPLEX)
504   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
505 #else
506   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
507 #endif
508 
509 // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
510 #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
511 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
512 #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
513 
514 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
515 #define ID_CNTL_SET(ID, I, VAL) \
516   do { \
517     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
518     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
519   } while (0)
520 
521 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
522 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
523   #define PetscMUMPS_c(mumps) \
524     do { \
525       if (mumps->use_petsc_omp_support) { \
526         if (mumps->is_omp_master) { \
527           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
528           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
529           PetscCall(PetscCallMumps_Private(&mumps->id)); \
530           PetscCall(PetscFPTrapPop()); \
531           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
532         } \
533         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
534         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
535          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
536          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
537          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
538       */ \
539         SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
540         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
541         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
542         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
543         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
544       } else { \
545         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
546         PetscCall(PetscCallMumps_Private(&mumps->id)); \
547         PetscCall(PetscFPTrapPop()); \
548       } \
549     } while (0)
550 #else
551   #define PetscMUMPS_c(mumps) \
552     do { \
553       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
554       PetscCall(PetscCallMumps_Private(&mumps->id)); \
555       PetscCall(PetscFPTrapPop()); \
556     } while (0)
557 #endif
558 
559 typedef struct Mat_MUMPS Mat_MUMPS;
560 struct Mat_MUMPS {
561   XMUMPS_STRUC_C id;
562 
563   MatStructure   matstruc;
564   PetscMPIInt    myid, petsc_size;
565   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
566   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
567   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
568   PetscMUMPSInt  sym;
569   MPI_Comm       mumps_comm;
570   PetscMUMPSInt *ICNTL_pre;
571   PetscReal     *CNTL_pre;
572   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
573   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
574   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
575   PetscMUMPSInt  ICNTL26;
576   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
577 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
578   PetscInt    *rhs_nrow, max_nrhs;
579   PetscMPIInt *rhs_recvcounts, *rhs_disps;
580   PetscScalar *rhs_loc, *rhs_recvbuf;
581 #endif
582   Vec            b_seq, x_seq;
583   PetscInt       ninfo, *info; /* which INFO to display */
584   PetscInt       sizeredrhs;
585   PetscScalar   *schur_sol;
586   PetscInt       schur_sizesol;
587   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
588   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
589   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
590   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
591 
592   /* Support for MATNEST */
593   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
594   PetscCount  *nest_vals_start;
595   PetscScalar *nest_vals;
596 
597   /* stuff used by petsc/mumps OpenMP support*/
598   PetscBool    use_petsc_omp_support;
599   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
600   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
601   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
602   PetscMPIInt  tag, omp_comm_size;
603   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
604   MPI_Request *reqs;
605 };
606 
607 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
608    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
609  */
610 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
611 {
612   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
613 
614   PetscFunctionBegin;
615 #if defined(PETSC_USE_64BIT_INDICES)
616   {
617     PetscInt i;
618     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
619       PetscCall(PetscFree(mumps->ia_alloc));
620       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
621       mumps->cur_ilen = nrow + 1;
622     }
623     if (nnz > mumps->cur_jlen) {
624       PetscCall(PetscFree(mumps->ja_alloc));
625       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
626       mumps->cur_jlen = nnz;
627     }
628     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
629     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
630     *ia_mumps = mumps->ia_alloc;
631     *ja_mumps = mumps->ja_alloc;
632   }
633 #else
634   *ia_mumps = ia;
635   *ja_mumps = ja;
636 #endif
637   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
642 {
643   PetscFunctionBegin;
644   PetscCall(PetscFree(mumps->id.listvar_schur));
645   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
646   PetscCall(PetscFree(mumps->schur_sol));
647   mumps->id.size_schur = 0;
648   mumps->id.schur_lld  = 0;
649   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 /* solve with rhs in mumps->id.redrhs and return in the same location */
654 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
655 {
656   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
657   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
658   MatFactorSchurStatus schurstatus;
659   PetscInt             sizesol;
660   const PetscScalar   *xarray;
661 
662   PetscFunctionBegin;
663   PetscCall(MatFactorFactorizeSchurComplement(F));
664   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
665   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
666 
667   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
668   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
669 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
670   PetscCall(MatBindToCPU(B, S->boundtocpu));
671 #endif
672   switch (schurstatus) {
673   case MAT_FACTOR_SCHUR_FACTORED:
674     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
675     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
676 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
677     PetscCall(MatBindToCPU(X, S->boundtocpu));
678 #endif
679     if (!mumps->id.ICNTL(9)) { /* transpose solve */
680       PetscCall(MatMatSolveTranspose(S, B, X));
681     } else {
682       PetscCall(MatMatSolve(S, B, X));
683     }
684     break;
685   case MAT_FACTOR_SCHUR_INVERTED:
686     sizesol = mumps->id.nrhs * mumps->id.size_schur;
687     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
688       PetscCall(PetscFree(mumps->schur_sol));
689       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
690       mumps->schur_sizesol = sizesol;
691     }
692     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
693     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
694 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
695     PetscCall(MatBindToCPU(X, S->boundtocpu));
696 #endif
697     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
698     if (!mumps->id.ICNTL(9)) { /* transpose solve */
699       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
700     } else {
701       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
702     }
703     PetscCall(MatProductSetFromOptions(X));
704     PetscCall(MatProductSymbolic(X));
705     PetscCall(MatProductNumeric(X));
706 
707     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
708     break;
709   default:
710     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
711   }
712   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
713   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
714   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
715   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
716   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
717   PetscCall(MatDestroy(&B));
718   PetscCall(MatDestroy(&X));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
723 {
724   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
725 
726   PetscFunctionBegin;
727   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
728     PetscFunctionReturn(PETSC_SUCCESS);
729   }
730   if (!expansion) { /* prepare for the condensation step */
731     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
732     /* allocate MUMPS internal array to store reduced right-hand sides */
733     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
734       mumps->id.lredrhs = mumps->id.size_schur;
735       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
736       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
737       PetscCall(PetscFree(mumps->redrhs));
738       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
739       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
740     }
741   } else {                                    /* prepare for the expansion step */
742     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
743     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
744     PetscMUMPS_c(mumps);
745     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
746     /* restore defaults */
747     mumps->id.ICNTL(26) = -1;
748     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
749     if (mumps->id.nrhs > 1) {
750       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
751       PetscCall(PetscFree(mumps->redrhs));
752       mumps->id.redrhs_len = 0;
753       mumps->id.lredrhs    = 0;
754       mumps->sizeredrhs    = 0;
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 /*
761   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
762 
763   input:
764     A       - matrix in aij,baij or sbaij format
765     shift   - 0: C style output triple; 1: Fortran style output triple.
766     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
767               MAT_REUSE_MATRIX:   only the values in v array are updated
768   output:
769     nnz     - dim of r, c, and v (number of local nonzero entries of A)
770     r, c, v - row and col index, matrix values (matrix triples)
771 
772   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
773   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
774   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
775 
776  */
777 
778 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
779 {
780   const PetscScalar *av;
781   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
782   PetscCount         nz, rnz, k;
783   PetscMUMPSInt     *row, *col;
784   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
785 
786   PetscFunctionBegin;
787   PetscCall(MatSeqAIJGetArrayRead(A, &av));
788   if (reuse == MAT_INITIAL_MATRIX) {
789     nz = aa->nz;
790     ai = aa->i;
791     aj = aa->j;
792     PetscCall(PetscMalloc2(nz, &row, nz, &col));
793     for (PetscCount i = k = 0; i < M; i++) {
794       rnz = ai[i + 1] - ai[i];
795       ajj = aj + ai[i];
796       for (PetscCount j = 0; j < rnz; j++) {
797         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
798         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
799         k++;
800       }
801     }
802     mumps->val = (PetscScalar *)av;
803     mumps->irn = row;
804     mumps->jcn = col;
805     mumps->nnz = nz;
806   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
807   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
808   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
813 {
814   PetscCount     nz, i, j, k, r;
815   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
816   PetscMUMPSInt *row, *col;
817 
818   PetscFunctionBegin;
819   nz = a->sliidx[a->totalslices];
820   if (reuse == MAT_INITIAL_MATRIX) {
821     PetscCall(PetscMalloc2(nz, &row, nz, &col));
822     for (i = k = 0; i < a->totalslices; i++) {
823       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
824     }
825     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
826     mumps->irn = row;
827     mumps->jcn = col;
828     mumps->nnz = nz;
829     mumps->val = a->val;
830   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
831   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
836 {
837   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
838   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
839   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
840   PetscInt        bs;
841   PetscMUMPSInt  *row, *col;
842 
843   PetscFunctionBegin;
844   if (reuse == MAT_INITIAL_MATRIX) {
845     PetscCall(MatGetBlockSize(A, &bs));
846     M  = A->rmap->N / bs;
847     ai = aa->i;
848     aj = aa->j;
849     PetscCall(PetscMalloc2(nz, &row, nz, &col));
850     for (i = 0; i < M; i++) {
851       ajj = aj + ai[i];
852       rnz = ai[i + 1] - ai[i];
853       for (k = 0; k < rnz; k++) {
854         for (j = 0; j < bs; j++) {
855           for (m = 0; m < bs; m++) {
856             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
857             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
858             idx++;
859           }
860         }
861       }
862     }
863     mumps->irn = row;
864     mumps->jcn = col;
865     mumps->nnz = nz;
866     mumps->val = aa->a;
867   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
868   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
873 {
874   const PetscInt *ai, *aj, *ajj;
875   PetscInt        bs;
876   PetscCount      nz, rnz, i, j, k, m;
877   PetscMUMPSInt  *row, *col;
878   PetscScalar    *val;
879   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
880   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
881 #if defined(PETSC_USE_COMPLEX)
882   PetscBool isset, hermitian;
883 #endif
884 
885   PetscFunctionBegin;
886 #if defined(PETSC_USE_COMPLEX)
887   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
888   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
889 #endif
890   ai = aa->i;
891   aj = aa->j;
892   PetscCall(MatGetBlockSize(A, &bs));
893   if (reuse == MAT_INITIAL_MATRIX) {
894     const PetscCount alloc_size = aa->nz * bs2;
895 
896     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
897     if (bs > 1) {
898       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
899       mumps->val = mumps->val_alloc;
900     } else {
901       mumps->val = aa->a;
902     }
903     mumps->irn = row;
904     mumps->jcn = col;
905   } else {
906     row = mumps->irn;
907     col = mumps->jcn;
908   }
909   val = mumps->val;
910 
911   nz = 0;
912   if (bs > 1) {
913     for (i = 0; i < mbs; i++) {
914       rnz = ai[i + 1] - ai[i];
915       ajj = aj + ai[i];
916       for (j = 0; j < rnz; j++) {
917         for (k = 0; k < bs; k++) {
918           for (m = 0; m < bs; m++) {
919             if (ajj[j] > i || k >= m) {
920               if (reuse == MAT_INITIAL_MATRIX) {
921                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
922                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
923               }
924               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
925             }
926           }
927         }
928       }
929     }
930   } else if (reuse == MAT_INITIAL_MATRIX) {
931     for (i = 0; i < mbs; i++) {
932       rnz = ai[i + 1] - ai[i];
933       ajj = aj + ai[i];
934       for (j = 0; j < rnz; j++) {
935         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
936         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
937         nz++;
938       }
939     }
940     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
941   } else if (mumps->nest_vals)
942     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
943   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
944   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
945   PetscFunctionReturn(PETSC_SUCCESS);
946 }
947 
948 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
949 {
950   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
951   PetscCount         nz, rnz, i, j;
952   const PetscScalar *av, *v1;
953   PetscScalar       *val;
954   PetscMUMPSInt     *row, *col;
955   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
956   PetscBool          missing;
957 #if defined(PETSC_USE_COMPLEX)
958   PetscBool hermitian, isset;
959 #endif
960 
961   PetscFunctionBegin;
962 #if defined(PETSC_USE_COMPLEX)
963   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
964   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
965 #endif
966   PetscCall(MatSeqAIJGetArrayRead(A, &av));
967   ai    = aa->i;
968   aj    = aa->j;
969   adiag = aa->diag;
970   PetscCall(MatMissingDiagonal_SeqAIJ(A, &missing, NULL));
971   if (reuse == MAT_INITIAL_MATRIX) {
972     /* count nz in the upper triangular part of A */
973     nz = 0;
974     if (missing) {
975       for (i = 0; i < M; i++) {
976         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
977           for (j = ai[i]; j < ai[i + 1]; j++) {
978             if (aj[j] < i) continue;
979             nz++;
980           }
981         } else {
982           nz += ai[i + 1] - adiag[i];
983         }
984       }
985     } else {
986       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
987     }
988     PetscCall(PetscMalloc2(nz, &row, nz, &col));
989     PetscCall(PetscMalloc1(nz, &val));
990     mumps->nnz = nz;
991     mumps->irn = row;
992     mumps->jcn = col;
993     mumps->val = mumps->val_alloc = val;
994 
995     nz = 0;
996     if (missing) {
997       for (i = 0; i < M; i++) {
998         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
999           for (j = ai[i]; j < ai[i + 1]; j++) {
1000             if (aj[j] < i) continue;
1001             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1002             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1003             val[nz] = av[j];
1004             nz++;
1005           }
1006         } else {
1007           rnz = ai[i + 1] - adiag[i];
1008           ajj = aj + adiag[i];
1009           v1  = av + adiag[i];
1010           for (j = 0; j < rnz; j++) {
1011             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1012             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1013             val[nz++] = v1[j];
1014           }
1015         }
1016       }
1017     } else {
1018       for (i = 0; i < M; i++) {
1019         rnz = ai[i + 1] - adiag[i];
1020         ajj = aj + adiag[i];
1021         v1  = av + adiag[i];
1022         for (j = 0; j < rnz; j++) {
1023           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1024           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1025           val[nz++] = v1[j];
1026         }
1027       }
1028     }
1029   } else {
1030     nz  = 0;
1031     val = mumps->val;
1032     if (missing) {
1033       for (i = 0; i < M; i++) {
1034         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1035           for (j = ai[i]; j < ai[i + 1]; j++) {
1036             if (aj[j] < i) continue;
1037             val[nz++] = av[j];
1038           }
1039         } else {
1040           rnz = ai[i + 1] - adiag[i];
1041           v1  = av + adiag[i];
1042           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1043         }
1044       }
1045     } else {
1046       for (i = 0; i < M; i++) {
1047         rnz = ai[i + 1] - adiag[i];
1048         v1  = av + adiag[i];
1049         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1050       }
1051     }
1052   }
1053   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1058 {
1059   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1060   PetscInt           bs;
1061   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1062   PetscMUMPSInt     *row, *col;
1063   const PetscScalar *av, *bv, *v1, *v2;
1064   PetscScalar       *val;
1065   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1066   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1067   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1068   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1069 #if defined(PETSC_USE_COMPLEX)
1070   PetscBool hermitian, isset;
1071 #endif
1072 
1073   PetscFunctionBegin;
1074 #if defined(PETSC_USE_COMPLEX)
1075   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1076   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1077 #endif
1078   PetscCall(MatGetBlockSize(A, &bs));
1079   rstart = A->rmap->rstart;
1080   ai     = aa->i;
1081   aj     = aa->j;
1082   bi     = bb->i;
1083   bj     = bb->j;
1084   av     = aa->a;
1085   bv     = bb->a;
1086 
1087   garray = mat->garray;
1088 
1089   if (reuse == MAT_INITIAL_MATRIX) {
1090     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1091     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1092     PetscCall(PetscMalloc1(nz, &val));
1093     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1094     mumps->irn = row;
1095     mumps->jcn = col;
1096     mumps->val = mumps->val_alloc = val;
1097   } else {
1098     val = mumps->val;
1099   }
1100 
1101   jj   = 0;
1102   irow = rstart;
1103   for (i = 0; i < mbs; i++) {
1104     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1105     countA = ai[i + 1] - ai[i];
1106     countB = bi[i + 1] - bi[i];
1107     bjj    = bj + bi[i];
1108     v1     = av + ai[i] * bs2;
1109     v2     = bv + bi[i] * bs2;
1110 
1111     if (bs > 1) {
1112       /* A-part */
1113       for (j = 0; j < countA; j++) {
1114         for (k = 0; k < bs; k++) {
1115           for (m = 0; m < bs; m++) {
1116             if (rstart + ajj[j] * bs > irow || k >= m) {
1117               if (reuse == MAT_INITIAL_MATRIX) {
1118                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1119                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1120               }
1121               val[jj++] = v1[j * bs2 + m + k * bs];
1122             }
1123           }
1124         }
1125       }
1126 
1127       /* B-part */
1128       for (j = 0; j < countB; j++) {
1129         for (k = 0; k < bs; k++) {
1130           for (m = 0; m < bs; m++) {
1131             if (reuse == MAT_INITIAL_MATRIX) {
1132               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1133               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1134             }
1135             val[jj++] = v2[j * bs2 + m + k * bs];
1136           }
1137         }
1138       }
1139     } else {
1140       /* A-part */
1141       for (j = 0; j < countA; j++) {
1142         if (reuse == MAT_INITIAL_MATRIX) {
1143           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1144           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1145         }
1146         val[jj++] = v1[j];
1147       }
1148 
1149       /* B-part */
1150       for (j = 0; j < countB; j++) {
1151         if (reuse == MAT_INITIAL_MATRIX) {
1152           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1153           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1154         }
1155         val[jj++] = v2[j];
1156       }
1157     }
1158     irow += bs;
1159   }
1160   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1161   PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163 
1164 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1165 {
1166   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1167   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1168   PetscMUMPSInt     *row, *col;
1169   const PetscScalar *av, *bv, *v1, *v2;
1170   PetscScalar       *val;
1171   Mat                Ad, Ao;
1172   Mat_SeqAIJ        *aa;
1173   Mat_SeqAIJ        *bb;
1174 
1175   PetscFunctionBegin;
1176   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1177   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1178   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1179 
1180   aa = (Mat_SeqAIJ *)Ad->data;
1181   bb = (Mat_SeqAIJ *)Ao->data;
1182   ai = aa->i;
1183   aj = aa->j;
1184   bi = bb->i;
1185   bj = bb->j;
1186 
1187   rstart = A->rmap->rstart;
1188   cstart = A->cmap->rstart;
1189 
1190   if (reuse == MAT_INITIAL_MATRIX) {
1191     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1192     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1193     PetscCall(PetscMalloc1(nz, &val));
1194     mumps->nnz = nz;
1195     mumps->irn = row;
1196     mumps->jcn = col;
1197     mumps->val = mumps->val_alloc = val;
1198   } else {
1199     val = mumps->val;
1200   }
1201 
1202   jj   = 0;
1203   irow = rstart;
1204   for (i = 0; i < m; i++) {
1205     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1206     countA = ai[i + 1] - ai[i];
1207     countB = bi[i + 1] - bi[i];
1208     bjj    = bj + bi[i];
1209     v1     = av + ai[i];
1210     v2     = bv + bi[i];
1211 
1212     /* A-part */
1213     for (j = 0; j < countA; j++) {
1214       if (reuse == MAT_INITIAL_MATRIX) {
1215         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1216         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1217       }
1218       val[jj++] = v1[j];
1219     }
1220 
1221     /* B-part */
1222     for (j = 0; j < countB; j++) {
1223       if (reuse == MAT_INITIAL_MATRIX) {
1224         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1225         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1226       }
1227       val[jj++] = v2[j];
1228     }
1229     irow++;
1230   }
1231   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1232   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1233   PetscFunctionReturn(PETSC_SUCCESS);
1234 }
1235 
1236 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1237 {
1238   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1239   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1240   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1241   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1242   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1243   const PetscInt     bs2 = mat->bs2;
1244   PetscInt           bs;
1245   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1246   PetscMUMPSInt     *row, *col;
1247   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1248   PetscScalar       *val;
1249 
1250   PetscFunctionBegin;
1251   PetscCall(MatGetBlockSize(A, &bs));
1252   if (reuse == MAT_INITIAL_MATRIX) {
1253     nz = bs2 * (aa->nz + bb->nz);
1254     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1255     PetscCall(PetscMalloc1(nz, &val));
1256     mumps->nnz = nz;
1257     mumps->irn = row;
1258     mumps->jcn = col;
1259     mumps->val = mumps->val_alloc = val;
1260   } else {
1261     val = mumps->val;
1262   }
1263 
1264   jj   = 0;
1265   irow = rstart;
1266   for (i = 0; i < mbs; i++) {
1267     countA = ai[i + 1] - ai[i];
1268     countB = bi[i + 1] - bi[i];
1269     ajj    = aj + ai[i];
1270     bjj    = bj + bi[i];
1271     v1     = av + bs2 * ai[i];
1272     v2     = bv + bs2 * bi[i];
1273 
1274     idx = 0;
1275     /* A-part */
1276     for (k = 0; k < countA; k++) {
1277       for (j = 0; j < bs; j++) {
1278         for (n = 0; n < bs; n++) {
1279           if (reuse == MAT_INITIAL_MATRIX) {
1280             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1281             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1282           }
1283           val[jj++] = v1[idx++];
1284         }
1285       }
1286     }
1287 
1288     idx = 0;
1289     /* B-part */
1290     for (k = 0; k < countB; k++) {
1291       for (j = 0; j < bs; j++) {
1292         for (n = 0; n < bs; n++) {
1293           if (reuse == MAT_INITIAL_MATRIX) {
1294             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1295             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1296           }
1297           val[jj++] = v2[idx++];
1298         }
1299       }
1300     }
1301     irow += bs;
1302   }
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1307 {
1308   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1309   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1310   PetscMUMPSInt     *row, *col;
1311   const PetscScalar *av, *bv, *v1, *v2;
1312   PetscScalar       *val;
1313   Mat                Ad, Ao;
1314   Mat_SeqAIJ        *aa;
1315   Mat_SeqAIJ        *bb;
1316 #if defined(PETSC_USE_COMPLEX)
1317   PetscBool hermitian, isset;
1318 #endif
1319 
1320   PetscFunctionBegin;
1321 #if defined(PETSC_USE_COMPLEX)
1322   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1323   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1324 #endif
1325   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1326   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1327   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1328 
1329   aa    = (Mat_SeqAIJ *)Ad->data;
1330   bb    = (Mat_SeqAIJ *)Ao->data;
1331   ai    = aa->i;
1332   aj    = aa->j;
1333   adiag = aa->diag;
1334   bi    = bb->i;
1335   bj    = bb->j;
1336 
1337   rstart = A->rmap->rstart;
1338 
1339   if (reuse == MAT_INITIAL_MATRIX) {
1340     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1341     nzb = 0; /* num of upper triangular entries in mat->B */
1342     for (i = 0; i < m; i++) {
1343       nza += (ai[i + 1] - adiag[i]);
1344       countB = bi[i + 1] - bi[i];
1345       bjj    = bj + bi[i];
1346       for (j = 0; j < countB; j++) {
1347         if (garray[bjj[j]] > rstart) nzb++;
1348       }
1349     }
1350 
1351     nz = nza + nzb; /* total nz of upper triangular part of mat */
1352     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1353     PetscCall(PetscMalloc1(nz, &val));
1354     mumps->nnz = nz;
1355     mumps->irn = row;
1356     mumps->jcn = col;
1357     mumps->val = mumps->val_alloc = val;
1358   } else {
1359     val = mumps->val;
1360   }
1361 
1362   jj   = 0;
1363   irow = rstart;
1364   for (i = 0; i < m; i++) {
1365     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1366     v1     = av + adiag[i];
1367     countA = ai[i + 1] - adiag[i];
1368     countB = bi[i + 1] - bi[i];
1369     bjj    = bj + bi[i];
1370     v2     = bv + bi[i];
1371 
1372     /* A-part */
1373     for (j = 0; j < countA; j++) {
1374       if (reuse == MAT_INITIAL_MATRIX) {
1375         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1376         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1377       }
1378       val[jj++] = v1[j];
1379     }
1380 
1381     /* B-part */
1382     for (j = 0; j < countB; j++) {
1383       if (garray[bjj[j]] > rstart) {
1384         if (reuse == MAT_INITIAL_MATRIX) {
1385           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1386           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1387         }
1388         val[jj++] = v2[j];
1389       }
1390     }
1391     irow++;
1392   }
1393   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1394   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1395   PetscFunctionReturn(PETSC_SUCCESS);
1396 }
1397 
1398 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1399 {
1400   const PetscScalar *av;
1401   const PetscInt     M = A->rmap->n;
1402   PetscCount         i;
1403   PetscMUMPSInt     *row, *col;
1404   Vec                v;
1405 
1406   PetscFunctionBegin;
1407   PetscCall(MatDiagonalGetDiagonal(A, &v));
1408   PetscCall(VecGetArrayRead(v, &av));
1409   if (reuse == MAT_INITIAL_MATRIX) {
1410     PetscCall(PetscMalloc2(M, &row, M, &col));
1411     for (i = 0; i < M; i++) {
1412       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1413       col[i] = row[i];
1414     }
1415     mumps->val = (PetscScalar *)av;
1416     mumps->irn = row;
1417     mumps->jcn = col;
1418     mumps->nnz = M;
1419   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1420   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1421   PetscCall(VecRestoreArrayRead(v, &av));
1422   PetscFunctionReturn(PETSC_SUCCESS);
1423 }
1424 
1425 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1426 {
1427   PetscScalar   *v;
1428   const PetscInt m = A->rmap->n, N = A->cmap->N;
1429   PetscInt       lda;
1430   PetscCount     i, j;
1431   PetscMUMPSInt *row, *col;
1432 
1433   PetscFunctionBegin;
1434   PetscCall(MatDenseGetArray(A, &v));
1435   PetscCall(MatDenseGetLDA(A, &lda));
1436   if (reuse == MAT_INITIAL_MATRIX) {
1437     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1438     for (i = 0; i < m; i++) {
1439       col[i] = 0;
1440       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1441     }
1442     for (j = 1; j < N; j++) {
1443       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1444       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1445     }
1446     if (lda == m) mumps->val = v;
1447     else {
1448       PetscCall(PetscMalloc1(m * N, &mumps->val));
1449       mumps->val_alloc = mumps->val;
1450       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1451     }
1452     mumps->irn = row;
1453     mumps->jcn = col;
1454     mumps->nnz = m * N;
1455   } else {
1456     if (lda == m && !mumps->nest_vals) mumps->val = v;
1457     else {
1458       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1459     }
1460   }
1461   PetscCall(MatDenseRestoreArray(A, &v));
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1466 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1467 // and its rows and columns permuted
1468 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1469 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1470 {
1471   Mat         A;
1472   PetscScalar s[2];
1473   PetscBool   isTrans, isHTrans, compare;
1474 
1475   PetscFunctionBegin;
1476   do {
1477     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1478     if (isTrans) {
1479       PetscCall(MatTransposeGetMat(*sub, &A));
1480       isHTrans = PETSC_FALSE;
1481     } else {
1482       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1483       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1484     }
1485     compare = (PetscBool)(isTrans || isHTrans);
1486     if (compare) {
1487       if (vshift && vscale) {
1488         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1489         if (!*conjugate) {
1490           *vshift += s[0] * *vscale;
1491           *vscale *= s[1];
1492         } else {
1493           *vshift += PetscConj(s[0]) * *vscale;
1494           *vscale *= PetscConj(s[1]);
1495         }
1496       }
1497       if (swap) *swap = (PetscBool)!*swap;
1498       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1499       *sub = A;
1500     }
1501   } while (compare);
1502   PetscFunctionReturn(PETSC_SUCCESS);
1503 }
1504 
1505 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1506 {
1507   Mat     **mats;
1508   PetscInt  nr, nc;
1509   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1510 
1511   PetscFunctionBegin;
1512   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1513   if (reuse == MAT_INITIAL_MATRIX) {
1514     PetscMUMPSInt *irns, *jcns;
1515     PetscScalar   *vals;
1516     PetscCount     totnnz, cumnnz, maxnnz;
1517     PetscInt      *pjcns_w, Mbs = 0;
1518     IS            *rows, *cols;
1519     PetscInt     **rows_idx, **cols_idx;
1520 
1521     cumnnz = 0;
1522     maxnnz = 0;
1523     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1524     for (PetscInt r = 0; r < nr; r++) {
1525       for (PetscInt c = 0; c < nc; c++) {
1526         Mat sub = mats[r][c];
1527 
1528         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1529         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1530         if (sub) {
1531           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1532           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1533           MatInfo   info;
1534 
1535           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1536           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1537           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1538           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1539           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1540           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1541           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1542           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1543           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1544 
1545           if (chol) {
1546             if (r == c) {
1547               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1548               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1549               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1550               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1551               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1552               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1553             } else {
1554               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1555               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1556               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1557               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1558               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560             }
1561           } else {
1562             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1563             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1564             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1565             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1566             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1567             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1568           }
1569           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1570           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1571           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1572           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1573           maxnnz = PetscMax(maxnnz, info.nz_used);
1574         }
1575       }
1576     }
1577 
1578     /* Allocate total COO */
1579     totnnz = cumnnz;
1580     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1581     PetscCall(PetscMalloc1(totnnz, &vals));
1582 
1583     /* Handle rows and column maps
1584        We directly map rows and use an SF for the columns */
1585     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1586     PetscCall(MatNestGetISs(A, rows, cols));
1587     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1588     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1589     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1590     else (void)maxnnz;
1591 
1592     cumnnz = 0;
1593     for (PetscInt r = 0; r < nr; r++) {
1594       for (PetscInt c = 0; c < nc; c++) {
1595         Mat             sub    = mats[r][c];
1596         const PetscInt *ridx   = rows_idx[r];
1597         const PetscInt *cidx   = cols_idx[c];
1598         PetscScalar     vscale = 1.0, vshift = 0.0;
1599         PetscInt        rst, size, bs;
1600         PetscSF         csf;
1601         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1602         PetscLayout     cmap;
1603         PetscInt        innz;
1604 
1605         mumps->nest_vals_start[r * nc + c] = cumnnz;
1606         if (c == r) {
1607           PetscCall(ISGetSize(rows[r], &size));
1608           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1609             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1610           }
1611           PetscCall(MatGetBlockSize(sub, &bs));
1612           Mbs += size / bs;
1613         }
1614         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1615 
1616         /* Extract inner blocks if needed */
1617         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1618         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1619 
1620         /* Get column layout to map off-process columns */
1621         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1622 
1623         /* Get row start to map on-process rows */
1624         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1625 
1626         /* Directly use the mumps datastructure and use C ordering for now */
1627         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1628 
1629         /* Swap the role of rows and columns indices for transposed blocks
1630            since we need values with global final ordering */
1631         if (swap) {
1632           cidx = rows_idx[r];
1633           ridx = cols_idx[c];
1634         }
1635 
1636         /* Communicate column indices
1637            This could have been done with a single SF but it would have complicated the code a lot.
1638            But since we do it only once, we pay the price of setting up an SF for each block */
1639         if (PetscDefined(USE_64BIT_INDICES)) {
1640           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1641         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1642         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1643         PetscCall(PetscIntCast(mumps->nnz, &innz));
1644         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1645         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1646         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1647         PetscCall(PetscSFDestroy(&csf));
1648 
1649         /* Import indices: use direct map for rows and mapped indices for columns */
1650         if (swap) {
1651           for (PetscInt k = 0; k < mumps->nnz; k++) {
1652             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1653             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1654           }
1655         } else {
1656           for (PetscInt k = 0; k < mumps->nnz; k++) {
1657             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1658             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1659           }
1660         }
1661 
1662         /* Import values to full COO */
1663         if (conjugate) { /* conjugate the entries */
1664           PetscScalar *v = vals + cumnnz;
1665           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1666         } else if (vscale != 1.0) {
1667           PetscScalar *v = vals + cumnnz;
1668           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1669         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1670 
1671         /* Shift new starting point and sanity check */
1672         cumnnz += mumps->nnz;
1673         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1674 
1675         /* Free scratch memory */
1676         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1677         PetscCall(PetscFree(mumps->val_alloc));
1678         mumps->val = NULL;
1679         mumps->nnz = 0;
1680       }
1681     }
1682     if (mumps->id.ICNTL(15) == 1) {
1683       if (Mbs != A->rmap->N) {
1684         PetscMPIInt rank, size;
1685 
1686         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1687         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1688         if (rank == 0) {
1689           PetscInt shift = 0;
1690 
1691           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1692           PetscCall(PetscFree(mumps->id.blkptr));
1693           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1694           mumps->id.blkptr[0] = 1;
1695           for (PetscInt i = 0; i < size; ++i) {
1696             for (PetscInt r = 0; r < nr; r++) {
1697               Mat             sub = mats[r][r];
1698               const PetscInt *ranges;
1699               PetscInt        bs;
1700 
1701               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1702               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1703               PetscCall(MatGetBlockSize(sub, &bs));
1704               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1705               shift += (ranges[i + 1] - ranges[i]) / bs;
1706             }
1707           }
1708         }
1709       } else mumps->id.ICNTL(15) = 0;
1710     }
1711     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1712     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1713     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1714     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1715     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1716     mumps->nest_vals_start[nr * nc] = cumnnz;
1717 
1718     /* Set pointers for final MUMPS data structure */
1719     mumps->nest_vals = vals;
1720     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1721     mumps->val       = vals;
1722     mumps->irn       = irns;
1723     mumps->jcn       = jcns;
1724     mumps->nnz       = cumnnz;
1725   } else {
1726     PetscScalar *oval = mumps->nest_vals;
1727     for (PetscInt r = 0; r < nr; r++) {
1728       for (PetscInt c = 0; c < nc; c++) {
1729         PetscBool   conjugate = PETSC_FALSE;
1730         Mat         sub       = mats[r][c];
1731         PetscScalar vscale = 1.0, vshift = 0.0;
1732         PetscInt    midx = r * nc + c;
1733 
1734         if (!mumps->nest_convert_to_triples[midx]) continue;
1735         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1736         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1737         mumps->val = oval + mumps->nest_vals_start[midx];
1738         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1739         if (conjugate) {
1740           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1741           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1742         } else if (vscale != 1.0) {
1743           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1744           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1745         }
1746       }
1747     }
1748     mumps->val = oval;
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1754 {
1755   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1756 
1757   PetscFunctionBegin;
1758   PetscCall(PetscFree(mumps->id.isol_loc));
1759   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1760   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1761   PetscCall(VecDestroy(&mumps->b_seq));
1762   PetscCall(VecDestroy(&mumps->x_seq));
1763   PetscCall(PetscFree(mumps->id.perm_in));
1764   PetscCall(PetscFree(mumps->id.blkvar));
1765   PetscCall(PetscFree(mumps->id.blkptr));
1766   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1767   PetscCall(PetscFree(mumps->val_alloc));
1768   PetscCall(PetscFree(mumps->info));
1769   PetscCall(PetscFree(mumps->ICNTL_pre));
1770   PetscCall(PetscFree(mumps->CNTL_pre));
1771   PetscCall(MatMumpsResetSchur_Private(mumps));
1772   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1773     mumps->id.job = JOB_END;
1774     PetscMUMPS_c(mumps);
1775     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1776     if (mumps->mumps_comm != MPI_COMM_NULL) {
1777       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1778       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1779     }
1780   }
1781   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1782 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1783   if (mumps->use_petsc_omp_support) {
1784     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1785     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1786     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1787   }
1788 #endif
1789   PetscCall(PetscFree(mumps->ia_alloc));
1790   PetscCall(PetscFree(mumps->ja_alloc));
1791   PetscCall(PetscFree(mumps->recvcount));
1792   PetscCall(PetscFree(mumps->reqs));
1793   PetscCall(PetscFree(mumps->irhs_loc));
1794   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1795   PetscCall(PetscFree(mumps->nest_vals));
1796   PetscCall(PetscFree(A->data));
1797 
1798   /* clear composed functions */
1799   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1801   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1802   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1803   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1804   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1805   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1813   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1814   PetscFunctionReturn(PETSC_SUCCESS);
1815 }
1816 
1817 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1818 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1819 {
1820   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1821   const PetscMPIInt ompsize = mumps->omp_comm_size;
1822   PetscInt          i, m, M, rstart;
1823 
1824   PetscFunctionBegin;
1825   PetscCall(MatGetSize(A, &M, NULL));
1826   PetscCall(MatGetLocalSize(A, &m, NULL));
1827   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1828   if (ompsize == 1) {
1829     if (!mumps->irhs_loc) {
1830       mumps->nloc_rhs = (PetscMUMPSInt)m;
1831       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1832       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1833       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1834     }
1835     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1836   } else {
1837 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1838     const PetscInt *ranges;
1839     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1840     MPI_Group       petsc_group, omp_group;
1841     PetscScalar    *recvbuf = NULL;
1842 
1843     if (mumps->is_omp_master) {
1844       /* Lazily initialize the omp stuff for distributed rhs */
1845       if (!mumps->irhs_loc) {
1846         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1847         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1848         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1849         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1850         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1851         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1852 
1853         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1854         mumps->nloc_rhs = 0;
1855         PetscCall(MatGetOwnershipRanges(A, &ranges));
1856         for (j = 0; j < ompsize; j++) {
1857           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1858           mumps->nloc_rhs += mumps->rhs_nrow[j];
1859         }
1860         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1861         for (j = k = 0; j < ompsize; j++) {
1862           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1863         }
1864 
1865         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1866         PetscCallMPI(MPI_Group_free(&petsc_group));
1867         PetscCallMPI(MPI_Group_free(&omp_group));
1868       }
1869 
1870       /* Realloc buffers when current nrhs is bigger than what we have met */
1871       if (nrhs > mumps->max_nrhs) {
1872         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1873         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1874         mumps->max_nrhs = nrhs;
1875       }
1876 
1877       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1878       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1879       mumps->rhs_disps[0] = 0;
1880       for (j = 1; j < ompsize; j++) {
1881         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1882         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1883       }
1884       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1885     }
1886 
1887     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1888     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1889 
1890     if (mumps->is_omp_master) {
1891       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1892         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1893         for (j = 0; j < ompsize; j++) {
1894           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1895           dst                    = dstbase;
1896           for (i = 0; i < nrhs; i++) {
1897             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1898             src += mumps->rhs_nrow[j];
1899             dst += mumps->nloc_rhs;
1900           }
1901           dstbase += mumps->rhs_nrow[j];
1902         }
1903       }
1904       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1905     }
1906 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1907   }
1908   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1909   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1910   mumps->id.lrhs_loc = mumps->nloc_rhs;
1911   mumps->id.irhs_loc = mumps->irhs_loc;
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1916 {
1917   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1918   const PetscScalar *barray = NULL;
1919   PetscScalar       *array;
1920   IS                 is_iden, is_petsc;
1921   PetscInt           i;
1922   PetscBool          second_solve = PETSC_FALSE;
1923   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1924 
1925   PetscFunctionBegin;
1926   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1927                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1928                                    &cite1));
1929   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1930                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1931                                    &cite2));
1932 
1933   PetscCall(VecFlag(x, A->factorerrortype));
1934   if (A->factorerrortype) {
1935     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1936     PetscFunctionReturn(PETSC_SUCCESS);
1937   }
1938 
1939   mumps->id.nrhs = 1;
1940   if (mumps->petsc_size > 1) {
1941     if (mumps->ICNTL20 == 10) {
1942       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1943       PetscCall(VecGetArrayRead(b, &barray));
1944       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1945     } else {
1946       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1947       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1948       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1949       if (!mumps->myid) {
1950         PetscCall(VecGetArray(mumps->b_seq, &array));
1951         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1952       }
1953     }
1954   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1955     mumps->id.ICNTL(20) = 0;
1956     PetscCall(VecCopy(b, x));
1957     PetscCall(VecGetArray(x, &array));
1958     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959   }
1960 
1961   /*
1962      handle condensation step of Schur complement (if any)
1963      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1964      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1965      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1966      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1967   */
1968   if (mumps->id.size_schur > 0) {
1969     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1970     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1971       second_solve = PETSC_TRUE;
1972       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1973       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1974     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1975   }
1976 
1977   mumps->id.job = JOB_SOLVE;
1978   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1979   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1980 
1981   /* handle expansion step of Schur complement (if any) */
1982   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1983   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1984     PetscCall(MatMumpsSolveSchur_Private(A));
1985     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1986   }
1987 
1988   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1989     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1990       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1991       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1992     }
1993     if (!mumps->scat_sol) { /* create scatter scat_sol */
1994       PetscInt *isol2_loc = NULL;
1995       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1996       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1997       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1998       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1999       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2000       PetscCall(ISDestroy(&is_iden));
2001       PetscCall(ISDestroy(&is_petsc));
2002       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2003     }
2004 
2005     PetscScalar *xarray;
2006     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2007     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2008     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2009     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2010     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2011 
2012     if (mumps->ICNTL20 == 10) { // distributed RHS
2013       PetscCall(VecRestoreArrayRead(b, &barray));
2014     } else if (!mumps->myid) { // centralized RHS
2015       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2016     }
2017   } else {
2018     // id.rhs has the solution in mumps precision
2019     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2020     PetscCall(VecRestoreArray(x, &array));
2021   }
2022 
2023   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2028 {
2029   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2030   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2031 
2032   PetscFunctionBegin;
2033   mumps->id.ICNTL(9) = 0;
2034   PetscCall(MatSolve_MUMPS(A, b, x));
2035   mumps->id.ICNTL(9) = value;
2036   PetscFunctionReturn(PETSC_SUCCESS);
2037 }
2038 
2039 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2040 {
2041   Mat                Bt = NULL;
2042   PetscBool          denseX, denseB, flg, flgT;
2043   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2044   PetscInt           i, nrhs, M, nrhsM;
2045   PetscScalar       *array;
2046   const PetscScalar *barray;
2047   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2048   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2049   PetscScalar       *sol_loc;
2050   void              *sol_loc_save;
2051   PetscCount         sol_loc_len_save;
2052   IS                 is_to, is_from;
2053   PetscInt           k, proc, j, m, myrstart;
2054   const PetscInt    *rstart;
2055   Vec                v_mpi, msol_loc;
2056   VecScatter         scat_sol;
2057   Vec                b_seq;
2058   VecScatter         scat_rhs;
2059   PetscScalar       *aa;
2060   PetscInt           spnr, *ia, *ja;
2061   Mat_MPIAIJ        *b = NULL;
2062 
2063   PetscFunctionBegin;
2064   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2065   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2066 
2067   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2068 
2069   if (denseB) {
2070     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2071     mumps->id.ICNTL(20) = 0; /* dense RHS */
2072   } else {                   /* sparse B */
2073     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2074     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2075     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2076     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2077     /* input B is transpose of actual RHS matrix,
2078      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2079     PetscCall(MatTransposeGetMat(B, &Bt));
2080     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2081   }
2082 
2083   PetscCall(MatGetSize(B, &M, &nrhs));
2084   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2085   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2086   mumps->id.lrhs = (PetscMUMPSInt)M;
2087 
2088   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2089     PetscScalar *aa;
2090     PetscInt     spnr, *ia, *ja;
2091     PetscBool    second_solve = PETSC_FALSE;
2092 
2093     PetscCall(MatDenseGetArray(X, &array));
2094     if (denseB) {
2095       /* copy B to X */
2096       PetscCall(MatDenseGetArrayRead(B, &barray));
2097       PetscCall(PetscArraycpy(array, barray, nrhsM));
2098       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2099     } else { /* sparse B */
2100       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2101       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2102       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2103       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2104       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2105     }
2106     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2107 
2108     /* handle condensation step of Schur complement (if any) */
2109     if (mumps->id.size_schur > 0) {
2110       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2111         second_solve = PETSC_TRUE;
2112         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2113         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2114       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2115     }
2116 
2117     mumps->id.job = JOB_SOLVE;
2118     PetscMUMPS_c(mumps);
2119     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2120 
2121     /* handle expansion step of Schur complement (if any) */
2122     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2123     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2124       PetscCall(MatMumpsSolveSchur_Private(A));
2125       for (j = 0; j < nrhs; ++j)
2126         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2127     }
2128 
2129     if (!denseB) { /* sparse B, restore ia, ja */
2130       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2131       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2132       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2133     }
2134 
2135     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2136     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2137     PetscCall(MatDenseRestoreArray(X, &array));
2138     PetscFunctionReturn(PETSC_SUCCESS);
2139   }
2140 
2141   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2142   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2143 
2144   /* create msol_loc to hold mumps local solution */
2145   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2146   sol_loc_save          = mumps->id.sol_loc;
2147   sol_loc_len_save      = mumps->id.sol_loc_len;
2148   mumps->id.isol_loc    = NULL; // an init state
2149   mumps->id.sol_loc     = NULL;
2150   mumps->id.sol_loc_len = 0;
2151 
2152   lsol_loc = mumps->id.lsol_loc;
2153   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2154   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2155   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2156   mumps->id.isol_loc = isol_loc;
2157 
2158   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2159 
2160   if (denseB) {
2161     if (mumps->ICNTL20 == 10) {
2162       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2163       PetscCall(MatDenseGetArrayRead(B, &barray));
2164       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2165       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2166       PetscCall(MatGetLocalSize(B, &m, NULL));
2167       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2168     } else {
2169       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2170       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2171         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2172         0, re-arrange B into desired order, which is a local operation.
2173       */
2174 
2175       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2176       /* wrap dense rhs matrix B into a vector v_mpi */
2177       PetscCall(MatGetLocalSize(B, &m, NULL));
2178       PetscCall(MatDenseGetArrayRead(B, &barray));
2179       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2180       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2181 
2182       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2183       if (!mumps->myid) {
2184         PetscInt *idx;
2185         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2186         PetscCall(PetscMalloc1(nrhsM, &idx));
2187         PetscCall(MatGetOwnershipRanges(B, &rstart));
2188         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2189           for (j = 0; j < nrhs; j++) {
2190             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2191           }
2192         }
2193 
2194         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2195         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2196         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2197       } else {
2198         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2199         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2200         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2201       }
2202 
2203       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2204       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2205       PetscCall(ISDestroy(&is_to));
2206       PetscCall(ISDestroy(&is_from));
2207       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2208 
2209       if (!mumps->myid) { /* define rhs on the host */
2210         PetscCall(VecGetArrayRead(b_seq, &barray));
2211         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2212         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2213       }
2214     }
2215   } else { /* sparse B */
2216     b = (Mat_MPIAIJ *)Bt->data;
2217 
2218     /* wrap dense X into a vector v_mpi */
2219     PetscCall(MatGetLocalSize(X, &m, NULL));
2220     PetscCall(MatDenseGetArrayRead(X, &barray));
2221     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2222     PetscCall(MatDenseRestoreArrayRead(X, &barray));
2223 
2224     if (!mumps->myid) {
2225       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2226       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2227       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2228       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2229       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2230     } else {
2231       mumps->id.irhs_ptr    = NULL;
2232       mumps->id.irhs_sparse = NULL;
2233       mumps->id.nz_rhs      = 0;
2234       if (mumps->id.rhs_sparse_len) {
2235         PetscCall(PetscFree(mumps->id.rhs_sparse));
2236         mumps->id.rhs_sparse_len = 0;
2237       }
2238     }
2239   }
2240 
2241   /* solve phase */
2242   mumps->id.job = JOB_SOLVE;
2243   PetscMUMPS_c(mumps);
2244   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2245 
2246   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2247   PetscCall(MatDenseGetArray(X, &array));
2248   PetscCall(VecPlaceArray(v_mpi, array));
2249 
2250   /* create scatter scat_sol */
2251   PetscCall(MatGetOwnershipRanges(X, &rstart));
2252   /* iidx: index for scatter mumps solution to PETSc X */
2253 
2254   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2255   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2256   for (i = 0; i < lsol_loc; i++) {
2257     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2258 
2259     for (proc = 0; proc < mumps->petsc_size; proc++) {
2260       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2261         myrstart = rstart[proc];
2262         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2263         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2264         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2265         break;
2266       }
2267     }
2268 
2269     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2270   }
2271   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2272   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2273   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2274   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2275   PetscCall(ISDestroy(&is_from));
2276   PetscCall(ISDestroy(&is_to));
2277   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2278   PetscCall(MatDenseRestoreArray(X, &array));
2279 
2280   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2281     mumps->id.sol_loc_len = 0;
2282     PetscCall(PetscFree(mumps->id.sol_loc));
2283   }
2284 
2285   // restore old values
2286   mumps->id.sol_loc     = sol_loc_save;
2287   mumps->id.sol_loc_len = sol_loc_len_save;
2288   mumps->id.isol_loc    = isol_loc_save;
2289 
2290   PetscCall(PetscFree2(sol_loc, isol_loc));
2291   PetscCall(PetscFree(idxx));
2292   PetscCall(VecDestroy(&msol_loc));
2293   PetscCall(VecDestroy(&v_mpi));
2294   if (!denseB) {
2295     if (!mumps->myid) {
2296       b = (Mat_MPIAIJ *)Bt->data;
2297       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2298       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2299       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2300     }
2301   } else {
2302     if (mumps->ICNTL20 == 0) {
2303       PetscCall(VecDestroy(&b_seq));
2304       PetscCall(VecScatterDestroy(&scat_rhs));
2305     }
2306   }
2307   PetscCall(VecScatterDestroy(&scat_sol));
2308   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2309   PetscFunctionReturn(PETSC_SUCCESS);
2310 }
2311 
2312 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2313 {
2314   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2315   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2316 
2317   PetscFunctionBegin;
2318   mumps->id.ICNTL(9) = 0;
2319   PetscCall(MatMatSolve_MUMPS(A, B, X));
2320   mumps->id.ICNTL(9) = value;
2321   PetscFunctionReturn(PETSC_SUCCESS);
2322 }
2323 
2324 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2325 {
2326   PetscBool flg;
2327   Mat       B;
2328 
2329   PetscFunctionBegin;
2330   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2331   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2332 
2333   /* Create B=Bt^T that uses Bt's data structure */
2334   PetscCall(MatCreateTranspose(Bt, &B));
2335 
2336   PetscCall(MatMatSolve_MUMPS(A, B, X));
2337   PetscCall(MatDestroy(&B));
2338   PetscFunctionReturn(PETSC_SUCCESS);
2339 }
2340 
2341 #if !defined(PETSC_USE_COMPLEX)
2342 /*
2343   input:
2344    F:        numeric factor
2345   output:
2346    nneg:     total number of negative pivots
2347    nzero:    total number of zero pivots
2348    npos:     (global dimension of F) - nneg - nzero
2349 */
2350 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2351 {
2352   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2353   PetscMPIInt size;
2354 
2355   PetscFunctionBegin;
2356   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2357   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2358   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2359 
2360   if (nneg) *nneg = mumps->id.INFOG(12);
2361   if (nzero || npos) {
2362     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2363     if (nzero) *nzero = mumps->id.INFOG(28);
2364     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2365   }
2366   PetscFunctionReturn(PETSC_SUCCESS);
2367 }
2368 #endif
2369 
2370 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2371 {
2372   PetscMPIInt    nreqs;
2373   PetscMUMPSInt *irn, *jcn;
2374   PetscMPIInt    count;
2375   PetscCount     totnnz, remain;
2376   const PetscInt osize = mumps->omp_comm_size;
2377   PetscScalar   *val;
2378 
2379   PetscFunctionBegin;
2380   if (osize > 1) {
2381     if (reuse == MAT_INITIAL_MATRIX) {
2382       /* master first gathers counts of nonzeros to receive */
2383       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2384       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2385 
2386       /* Then each computes number of send/recvs */
2387       if (mumps->is_omp_master) {
2388         /* Start from 1 since self communication is not done in MPI */
2389         nreqs = 0;
2390         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2391       } else {
2392         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2393       }
2394       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2395 
2396       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2397          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2398          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2399          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2400        */
2401       nreqs = 0; /* counter for actual send/recvs */
2402       if (mumps->is_omp_master) {
2403         totnnz = 0;
2404 
2405         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2406         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2407         PetscCall(PetscMalloc1(totnnz, &val));
2408 
2409         /* Self communication */
2410         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2411         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2412         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2413 
2414         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2415         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2416         PetscCall(PetscFree(mumps->val_alloc));
2417         mumps->nnz = totnnz;
2418         mumps->irn = irn;
2419         mumps->jcn = jcn;
2420         mumps->val = mumps->val_alloc = val;
2421 
2422         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2423         jcn += mumps->recvcount[0];
2424         val += mumps->recvcount[0];
2425 
2426         /* Remote communication */
2427         for (PetscMPIInt i = 1; i < osize; i++) {
2428           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2429           remain = mumps->recvcount[i] - count;
2430           while (count > 0) {
2431             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2432             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2433             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2434             irn += count;
2435             jcn += count;
2436             val += count;
2437             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2438             remain -= count;
2439           }
2440         }
2441       } else {
2442         irn    = mumps->irn;
2443         jcn    = mumps->jcn;
2444         val    = mumps->val;
2445         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2446         remain = mumps->nnz - count;
2447         while (count > 0) {
2448           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2449           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2450           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2451           irn += count;
2452           jcn += count;
2453           val += count;
2454           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2455           remain -= count;
2456         }
2457       }
2458     } else {
2459       nreqs = 0;
2460       if (mumps->is_omp_master) {
2461         val = mumps->val + mumps->recvcount[0];
2462         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2463           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2464           remain = mumps->recvcount[i] - count;
2465           while (count > 0) {
2466             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2467             val += count;
2468             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2469             remain -= count;
2470           }
2471         }
2472       } else {
2473         val    = mumps->val;
2474         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2475         remain = mumps->nnz - count;
2476         while (count > 0) {
2477           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2478           val += count;
2479           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2480           remain -= count;
2481         }
2482       }
2483     }
2484     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2485     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2486   }
2487   PetscFunctionReturn(PETSC_SUCCESS);
2488 }
2489 
2490 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2491 {
2492   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2493 
2494   PetscFunctionBegin;
2495   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2496     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2497     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2498     PetscFunctionReturn(PETSC_SUCCESS);
2499   }
2500 
2501   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2502   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2503 
2504   /* numerical factorization phase */
2505   mumps->id.job = JOB_FACTNUMERIC;
2506   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2507     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2508   } else {
2509     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2510   }
2511 
2512   if (F->schur) {
2513     const PetscScalar *array;
2514     MUMPS_INT          size = mumps->id.size_schur;
2515     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2516     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2517     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2518   }
2519 
2520   PetscMUMPS_c(mumps);
2521   if (mumps->id.INFOG(1) < 0) {
2522     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2523     if (mumps->id.INFOG(1) == -10) {
2524       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2525       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2526     } else if (mumps->id.INFOG(1) == -13) {
2527       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2528       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2529     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2530       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2531       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2532     } else {
2533       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534       F->factorerrortype = MAT_FACTOR_OTHER;
2535     }
2536   }
2537   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2538 
2539   F->assembled = PETSC_TRUE;
2540 
2541   if (F->schur) { /* reset Schur status to unfactored */
2542 #if defined(PETSC_HAVE_CUDA)
2543     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2544 #endif
2545     PetscScalar *array;
2546     PetscCall(MatDenseGetArray(F->schur, &array));
2547     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2548     PetscCall(MatDenseRestoreArray(F->schur, &array));
2549     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2550       mumps->id.ICNTL(19) = 2;
2551       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2552     }
2553     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2554   }
2555 
2556   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2557   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2558 
2559   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2560   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2561   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2562   // which the solution (in SOL_loc) is available on the local processor.
2563   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2564   // normally have the same contents for each of these calls. The only exception is the case of
2565   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2566   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2567   if (mumps->petsc_size > 1) {
2568     PetscInt     lsol_loc;
2569     PetscScalar *array;
2570 
2571     /* distributed solution; Create x_seq=sol_loc for repeated use */
2572     if (mumps->x_seq) {
2573       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2574       PetscCall(PetscFree(mumps->id.isol_loc));
2575       PetscCall(VecDestroy(&mumps->x_seq));
2576     }
2577     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2578     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2579     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2580     PetscCall(VecGetArray(mumps->x_seq, &array));
2581     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2582     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2583     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2584   }
2585   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 /* Sets MUMPS options from the options database */
2590 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2591 {
2592   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2593   PetscReal     cntl;
2594   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2595   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2596   PetscBool     flg   = PETSC_FALSE;
2597   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2598   void         *arr;
2599 
2600   PetscFunctionBegin;
2601   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2602   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2603     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2604     PetscInt       nthreads   = 0;
2605     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2606     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2607     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2608 
2609     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2610     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2611     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2612 
2613     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2614     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2615     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2616     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2617     if (mumps->use_petsc_omp_support) {
2618       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2619 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2620       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2621       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2622 #else
2623       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2624               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2625 #endif
2626     } else {
2627       mumps->omp_comm      = PETSC_COMM_SELF;
2628       mumps->mumps_comm    = mumps->petsc_comm;
2629       mumps->is_omp_master = PETSC_TRUE;
2630     }
2631     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2632     mumps->reqs = NULL;
2633     mumps->tag  = 0;
2634 
2635     if (mumps->mumps_comm != MPI_COMM_NULL) {
2636       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2637         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2638         MPI_Comm comm;
2639         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2640         mumps->mumps_comm = comm;
2641       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2642     }
2643 
2644     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2645     mumps->id.job          = JOB_INIT;
2646     mumps->id.par          = 1; /* host participates factorizaton and solve */
2647     mumps->id.sym          = mumps->sym;
2648 
2649     size          = mumps->id.size_schur;
2650     arr           = mumps->id.schur;
2651     listvar_schur = mumps->id.listvar_schur;
2652     nblk          = mumps->id.nblk;
2653     blkvar        = mumps->id.blkvar;
2654     blkptr        = mumps->id.blkptr;
2655     if (PetscDefined(USE_DEBUG)) {
2656       for (PetscInt i = 0; i < size; i++)
2657         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2658                    A->rmap->N);
2659     }
2660 
2661     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2662     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2663     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2664     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2665 
2666     PetscMUMPS_c(mumps);
2667     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2668 
2669     /* set PETSc-MUMPS default options - override MUMPS default */
2670     mumps->id.ICNTL(3) = 0;
2671     mumps->id.ICNTL(4) = 0;
2672     if (mumps->petsc_size == 1) {
2673       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2674       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2675     } else {
2676       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2677       mumps->id.ICNTL(21) = 1; /* distributed solution */
2678     }
2679     if (nblk && blkptr) {
2680       mumps->id.ICNTL(15) = 1;
2681       mumps->id.nblk      = nblk;
2682       mumps->id.blkvar    = blkvar;
2683       mumps->id.blkptr    = blkptr;
2684     } else mumps->id.ICNTL(15) = 0;
2685 
2686     /* restore cached ICNTL and CNTL values */
2687     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2688     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2689 
2690     PetscCall(PetscFree(mumps->ICNTL_pre));
2691     PetscCall(PetscFree(mumps->CNTL_pre));
2692 
2693     if (schur) {
2694       mumps->id.size_schur    = size;
2695       mumps->id.schur_lld     = size;
2696       mumps->id.schur         = arr;
2697       mumps->id.listvar_schur = listvar_schur;
2698       if (mumps->petsc_size > 1) {
2699         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2700 
2701         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2702         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2703         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2704         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2705       } else {
2706         if (F->factortype == MAT_FACTOR_LU) {
2707           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2708         } else {
2709           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2710         }
2711       }
2712       mumps->id.ICNTL(26) = -1;
2713     }
2714 
2715     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2716        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2717      */
2718     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2719     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2720 
2721     mumps->scat_rhs = NULL;
2722     mumps->scat_sol = NULL;
2723   }
2724   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2725   if (flg) mumps->id.ICNTL(1) = icntl;
2726   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2727   if (flg) mumps->id.ICNTL(2) = icntl;
2728   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2729   if (flg) mumps->id.ICNTL(3) = icntl;
2730 
2731   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2732   if (flg) mumps->id.ICNTL(4) = icntl;
2733   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2734 
2735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2736   if (flg) mumps->id.ICNTL(6) = icntl;
2737 
2738   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2739   if (flg) {
2740     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2741     mumps->id.ICNTL(7) = icntl;
2742   }
2743 
2744   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2745   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2746   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2747   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2748   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2749   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2750   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2751   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2752   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2754   if (flg) {
2755     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2756     else if (mumps->id.ICNTL(15) > 0) {
2757       const PetscInt *bsizes;
2758       PetscInt        nblocks, p, *blkptr = NULL;
2759       PetscMPIInt    *recvcounts, *displs, n;
2760       PetscMPIInt     rank, size = 0;
2761 
2762       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2763       flg = PETSC_TRUE;
2764       for (p = 0; p < nblocks; ++p) {
2765         if (bsizes[p] > 1) break;
2766       }
2767       if (p == nblocks) flg = PETSC_FALSE;
2768       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2769       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2770         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2771         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2772         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2773         PetscCall(PetscMPIIntCast(nblocks, &n));
2774         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2775         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2776         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2777         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2778         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2779         if (rank == 0) {
2780           blkptr[0] = 1;
2781           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2782           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2783         }
2784         PetscCall(PetscFree2(recvcounts, displs));
2785         PetscCall(PetscFree(blkptr));
2786       }
2787     }
2788   }
2789   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2790   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2791     PetscCall(MatDestroy(&F->schur));
2792     PetscCall(MatMumpsResetSchur_Private(mumps));
2793   }
2794 
2795   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2796      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2797      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2798      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2799      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2800      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2801    */
2802   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2803 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2804   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2805 #endif
2806   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2807   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2808 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2809   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2810 #endif
2811   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2812 
2813   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2814   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2815   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2816   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2817 
2818   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2819   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2820   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2821   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2822   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2823   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2824   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2825   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2826   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2827   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2828   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2829   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2830   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2831   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2832   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2833   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2834 
2835   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2836   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2837   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2838   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2839   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2840   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2841   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2842   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2843   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2844   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2845   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2846   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2847 
2848   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2849 
2850   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2851   if (ninfo) {
2852     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2853     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2854     mumps->ninfo = ninfo;
2855     for (i = 0; i < ninfo; i++) {
2856       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2857       mumps->info[i] = info[i];
2858     }
2859   }
2860   PetscOptionsEnd();
2861   PetscFunctionReturn(PETSC_SUCCESS);
2862 }
2863 
2864 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2865 {
2866   PetscFunctionBegin;
2867   if (mumps->id.INFOG(1) < 0) {
2868     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2869     if (mumps->id.INFOG(1) == -6) {
2870       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2871       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2872     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2873       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2874       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2875     } else {
2876       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2877       F->factorerrortype = MAT_FACTOR_OTHER;
2878     }
2879   }
2880   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2881   PetscFunctionReturn(PETSC_SUCCESS);
2882 }
2883 
2884 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2885 {
2886   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2887   Vec            b;
2888   const PetscInt M = A->rmap->N;
2889 
2890   PetscFunctionBegin;
2891   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2892     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2893     PetscFunctionReturn(PETSC_SUCCESS);
2894   }
2895 
2896   /* Set MUMPS options from the options database */
2897   PetscCall(MatSetFromOptions_MUMPS(F, A));
2898 
2899   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2900   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2901 
2902   /* analysis phase */
2903   mumps->id.job = JOB_FACTSYMBOLIC;
2904   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2905   switch (mumps->id.ICNTL(18)) {
2906   case 0: /* centralized assembled matrix input */
2907     if (!mumps->myid) {
2908       mumps->id.nnz = mumps->nnz;
2909       mumps->id.irn = mumps->irn;
2910       mumps->id.jcn = mumps->jcn;
2911       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2912       if (r && mumps->id.ICNTL(7) == 7) {
2913         mumps->id.ICNTL(7) = 1;
2914         if (!mumps->myid) {
2915           const PetscInt *idx;
2916           PetscInt        i;
2917 
2918           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2919           PetscCall(ISGetIndices(r, &idx));
2920           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2921           PetscCall(ISRestoreIndices(r, &idx));
2922         }
2923       }
2924     }
2925     break;
2926   case 3: /* distributed assembled matrix input (size>1) */
2927     mumps->id.nnz_loc = mumps->nnz;
2928     mumps->id.irn_loc = mumps->irn;
2929     mumps->id.jcn_loc = mumps->jcn;
2930     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2931     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2932       PetscCall(MatCreateVecs(A, NULL, &b));
2933       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2934       PetscCall(VecDestroy(&b));
2935     }
2936     break;
2937   }
2938   PetscMUMPS_c(mumps);
2939   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2940 
2941   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2942   F->ops->solve             = MatSolve_MUMPS;
2943   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2944   F->ops->matsolve          = MatMatSolve_MUMPS;
2945   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2946   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2947 
2948   mumps->matstruc = SAME_NONZERO_PATTERN;
2949   PetscFunctionReturn(PETSC_SUCCESS);
2950 }
2951 
2952 /* Note the PETSc r and c permutations are ignored */
2953 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2954 {
2955   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2956   Vec            b;
2957   const PetscInt M = A->rmap->N;
2958 
2959   PetscFunctionBegin;
2960   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2961     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2962     PetscFunctionReturn(PETSC_SUCCESS);
2963   }
2964 
2965   /* Set MUMPS options from the options database */
2966   PetscCall(MatSetFromOptions_MUMPS(F, A));
2967 
2968   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2969   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2970 
2971   /* analysis phase */
2972   mumps->id.job = JOB_FACTSYMBOLIC;
2973   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2974   switch (mumps->id.ICNTL(18)) {
2975   case 0: /* centralized assembled matrix input */
2976     if (!mumps->myid) {
2977       mumps->id.nnz = mumps->nnz;
2978       mumps->id.irn = mumps->irn;
2979       mumps->id.jcn = mumps->jcn;
2980       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2981     }
2982     break;
2983   case 3: /* distributed assembled matrix input (size>1) */
2984     mumps->id.nnz_loc = mumps->nnz;
2985     mumps->id.irn_loc = mumps->irn;
2986     mumps->id.jcn_loc = mumps->jcn;
2987     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2988     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2989       PetscCall(MatCreateVecs(A, NULL, &b));
2990       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2991       PetscCall(VecDestroy(&b));
2992     }
2993     break;
2994   }
2995   PetscMUMPS_c(mumps);
2996   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2997 
2998   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2999   F->ops->solve             = MatSolve_MUMPS;
3000   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3001   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3002 
3003   mumps->matstruc = SAME_NONZERO_PATTERN;
3004   PetscFunctionReturn(PETSC_SUCCESS);
3005 }
3006 
3007 /* Note the PETSc r permutation and factor info are ignored */
3008 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3009 {
3010   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3011   Vec            b;
3012   const PetscInt M = A->rmap->N;
3013 
3014   PetscFunctionBegin;
3015   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3016     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3017     PetscFunctionReturn(PETSC_SUCCESS);
3018   }
3019 
3020   /* Set MUMPS options from the options database */
3021   PetscCall(MatSetFromOptions_MUMPS(F, A));
3022 
3023   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3024   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3025 
3026   /* analysis phase */
3027   mumps->id.job = JOB_FACTSYMBOLIC;
3028   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3029   switch (mumps->id.ICNTL(18)) {
3030   case 0: /* centralized assembled matrix input */
3031     if (!mumps->myid) {
3032       mumps->id.nnz = mumps->nnz;
3033       mumps->id.irn = mumps->irn;
3034       mumps->id.jcn = mumps->jcn;
3035       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3036     }
3037     break;
3038   case 3: /* distributed assembled matrix input (size>1) */
3039     mumps->id.nnz_loc = mumps->nnz;
3040     mumps->id.irn_loc = mumps->irn;
3041     mumps->id.jcn_loc = mumps->jcn;
3042     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3043     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3044       PetscCall(MatCreateVecs(A, NULL, &b));
3045       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3046       PetscCall(VecDestroy(&b));
3047     }
3048     break;
3049   }
3050   PetscMUMPS_c(mumps);
3051   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3052 
3053   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3054   F->ops->solve                 = MatSolve_MUMPS;
3055   F->ops->solvetranspose        = MatSolve_MUMPS;
3056   F->ops->matsolve              = MatMatSolve_MUMPS;
3057   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3058   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3059 #if defined(PETSC_USE_COMPLEX)
3060   F->ops->getinertia = NULL;
3061 #else
3062   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3063 #endif
3064 
3065   mumps->matstruc = SAME_NONZERO_PATTERN;
3066   PetscFunctionReturn(PETSC_SUCCESS);
3067 }
3068 
3069 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3070 {
3071   PetscBool         isascii;
3072   PetscViewerFormat format;
3073   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3074 
3075   PetscFunctionBegin;
3076   /* check if matrix is mumps type */
3077   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3078 
3079   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3080   if (isascii) {
3081     PetscCall(PetscViewerGetFormat(viewer, &format));
3082     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3083       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3084       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3085         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3086         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3087         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3088         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3089         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3090         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3091         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3092         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3093         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3094         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3095         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3096         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3097         if (mumps->id.ICNTL(11) > 0) {
3098           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3099           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3100           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3101           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3102           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3103           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3104         }
3105         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3106         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3107         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3108         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3109         /* ICNTL(15-17) not used */
3110         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3111         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3112         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3113         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3114         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3115         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3116 
3117         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3118         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3119         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3120         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3121         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3122         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
3123 
3124         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3125         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3126         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3127         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3128         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3129         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3130         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3131         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3132         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3133         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3134 
3135         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3136         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3137         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3138         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3139         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3140         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3141 
3142         /* information local to each processor */
3143         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3144         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3145         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3146         PetscCall(PetscViewerFlush(viewer));
3147         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3148         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3149         PetscCall(PetscViewerFlush(viewer));
3150         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3151         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3152         PetscCall(PetscViewerFlush(viewer));
3153 
3154         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3155         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3156         PetscCall(PetscViewerFlush(viewer));
3157 
3158         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3159         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3160         PetscCall(PetscViewerFlush(viewer));
3161 
3162         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3163         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3164         PetscCall(PetscViewerFlush(viewer));
3165 
3166         if (mumps->ninfo && mumps->ninfo <= 80) {
3167           PetscInt i;
3168           for (i = 0; i < mumps->ninfo; i++) {
3169             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3170             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3171             PetscCall(PetscViewerFlush(viewer));
3172           }
3173         }
3174         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3175       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3176 
3177       if (mumps->myid == 0) { /* information from the host */
3178         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3179         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3180         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3181         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3182 
3183         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3184         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3185         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3186         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3187         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3188         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3189         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3190         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3191         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3192         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3193         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3194         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3195         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3196         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3197         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3198         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3199         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3200         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3202         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3203         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3204         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3206         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3207         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3208         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3210         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3211         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3212         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3213         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3214         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3215         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3216         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3217       }
3218     }
3219   }
3220   PetscFunctionReturn(PETSC_SUCCESS);
3221 }
3222 
3223 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3224 {
3225   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3226 
3227   PetscFunctionBegin;
3228   info->block_size        = 1.0;
3229   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3230   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3231   info->nz_unneeded       = 0.0;
3232   info->assemblies        = 0.0;
3233   info->mallocs           = 0.0;
3234   info->memory            = 0.0;
3235   info->fill_ratio_given  = 0;
3236   info->fill_ratio_needed = 0;
3237   info->factor_mallocs    = 0;
3238   PetscFunctionReturn(PETSC_SUCCESS);
3239 }
3240 
3241 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3242 {
3243   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3244   const PetscScalar *arr;
3245   const PetscInt    *idxs;
3246   PetscInt           size, i;
3247 
3248   PetscFunctionBegin;
3249   PetscCall(ISGetLocalSize(is, &size));
3250   /* Schur complement matrix */
3251   PetscCall(MatDestroy(&F->schur));
3252   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3253   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3254   // don't allocate mumps->id.schur[] now as its precision is yet to know
3255   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3256   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3257   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3258   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3259 
3260   /* MUMPS expects Fortran style indices */
3261   PetscCall(PetscFree(mumps->id.listvar_schur));
3262   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3263   PetscCall(ISGetIndices(is, &idxs));
3264   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3265   PetscCall(ISRestoreIndices(is, &idxs));
3266   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3267   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3268   else mumps->ICNTL26 = -1;
3269   PetscFunctionReturn(PETSC_SUCCESS);
3270 }
3271 
3272 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3273 {
3274   Mat          St;
3275   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3276   PetscScalar *array;
3277   PetscInt     i, j, N = mumps->id.size_schur;
3278 
3279   PetscFunctionBegin;
3280   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3281   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3282   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3283   PetscCall(MatSetType(St, MATDENSE));
3284   PetscCall(MatSetUp(St));
3285   PetscCall(MatDenseGetArray(St, &array));
3286   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3287     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3288       for (i = 0; i < N; i++) {
3289         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3290       }
3291     } else { /* stored by columns */
3292       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3293     }
3294   } else {                          /* either full or lower-triangular (not packed) */
3295     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3296       for (i = 0; i < N; i++) {
3297         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3298       }
3299     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3300       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3301     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3302       for (i = 0; i < N; i++) {
3303         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3304       }
3305     }
3306   }
3307   PetscCall(MatDenseRestoreArray(St, &array));
3308   *S = St;
3309   PetscFunctionReturn(PETSC_SUCCESS);
3310 }
3311 
3312 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3313 {
3314   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3315 
3316   PetscFunctionBegin;
3317   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3318     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3319     for (i = 0; i < nICNTL_pre; ++i)
3320       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3321     if (i == nICNTL_pre) {                             /* not already cached */
3322       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3323       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3324       mumps->ICNTL_pre[0]++;
3325     }
3326     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3327     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3328   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3329   PetscFunctionReturn(PETSC_SUCCESS);
3330 }
3331 
3332 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3333 {
3334   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3335 
3336   PetscFunctionBegin;
3337   if (mumps->id.job == JOB_NULL) {
3338     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3339     *ival = 0;
3340     for (i = 0; i < nICNTL_pre; ++i) {
3341       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3342     }
3343   } else *ival = mumps->id.ICNTL(icntl);
3344   PetscFunctionReturn(PETSC_SUCCESS);
3345 }
3346 
3347 /*@
3348   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3349 
3350   Logically Collective
3351 
3352   Input Parameters:
3353 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3354 . icntl - index of MUMPS parameter array `ICNTL()`
3355 - ival  - value of MUMPS `ICNTL(icntl)`
3356 
3357   Options Database Key:
3358 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3359 
3360   Level: beginner
3361 
3362   Note:
3363   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3364 
3365 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3366 @*/
3367 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3368 {
3369   PetscFunctionBegin;
3370   PetscValidType(F, 1);
3371   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3372   PetscValidLogicalCollectiveInt(F, icntl, 2);
3373   PetscValidLogicalCollectiveInt(F, ival, 3);
3374   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3375   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3376   PetscFunctionReturn(PETSC_SUCCESS);
3377 }
3378 
3379 /*@
3380   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3381 
3382   Logically Collective
3383 
3384   Input Parameters:
3385 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3386 - icntl - index of MUMPS parameter array ICNTL()
3387 
3388   Output Parameter:
3389 . ival - value of MUMPS ICNTL(icntl)
3390 
3391   Level: beginner
3392 
3393 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3394 @*/
3395 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3396 {
3397   PetscFunctionBegin;
3398   PetscValidType(F, 1);
3399   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3400   PetscValidLogicalCollectiveInt(F, icntl, 2);
3401   PetscAssertPointer(ival, 3);
3402   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3403   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3404   PetscFunctionReturn(PETSC_SUCCESS);
3405 }
3406 
3407 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3408 {
3409   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3410 
3411   PetscFunctionBegin;
3412   if (mumps->id.job == JOB_NULL) {
3413     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3414     for (i = 0; i < nCNTL_pre; ++i)
3415       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3416     if (i == nCNTL_pre) {
3417       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3418       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3419       mumps->CNTL_pre[0]++;
3420     }
3421     mumps->CNTL_pre[1 + 2 * i] = icntl;
3422     mumps->CNTL_pre[2 + 2 * i] = val;
3423   } else ID_CNTL_SET(mumps->id, icntl, val);
3424   PetscFunctionReturn(PETSC_SUCCESS);
3425 }
3426 
3427 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3428 {
3429   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3430 
3431   PetscFunctionBegin;
3432   if (mumps->id.job == JOB_NULL) {
3433     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3434     *val = 0.0;
3435     for (i = 0; i < nCNTL_pre; ++i) {
3436       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3437     }
3438   } else *val = ID_CNTL_GET(mumps->id, icntl);
3439   PetscFunctionReturn(PETSC_SUCCESS);
3440 }
3441 
3442 /*@
3443   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3444 
3445   Logically Collective
3446 
3447   Input Parameters:
3448 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3449 . icntl - index of MUMPS parameter array `CNTL()`
3450 - val   - value of MUMPS `CNTL(icntl)`
3451 
3452   Options Database Key:
3453 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3454 
3455   Level: beginner
3456 
3457   Note:
3458   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3459 
3460 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3461 @*/
3462 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3463 {
3464   PetscFunctionBegin;
3465   PetscValidType(F, 1);
3466   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3467   PetscValidLogicalCollectiveInt(F, icntl, 2);
3468   PetscValidLogicalCollectiveReal(F, val, 3);
3469   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3470   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3471   PetscFunctionReturn(PETSC_SUCCESS);
3472 }
3473 
3474 /*@
3475   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3476 
3477   Logically Collective
3478 
3479   Input Parameters:
3480 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3481 - icntl - index of MUMPS parameter array CNTL()
3482 
3483   Output Parameter:
3484 . val - value of MUMPS CNTL(icntl)
3485 
3486   Level: beginner
3487 
3488 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3489 @*/
3490 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3491 {
3492   PetscFunctionBegin;
3493   PetscValidType(F, 1);
3494   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3495   PetscValidLogicalCollectiveInt(F, icntl, 2);
3496   PetscAssertPointer(val, 3);
3497   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3498   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3499   PetscFunctionReturn(PETSC_SUCCESS);
3500 }
3501 
3502 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3503 {
3504   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3505 
3506   PetscFunctionBegin;
3507   *info = mumps->id.INFO(icntl);
3508   PetscFunctionReturn(PETSC_SUCCESS);
3509 }
3510 
3511 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3512 {
3513   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3514 
3515   PetscFunctionBegin;
3516   *infog = mumps->id.INFOG(icntl);
3517   PetscFunctionReturn(PETSC_SUCCESS);
3518 }
3519 
3520 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3521 {
3522   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3523 
3524   PetscFunctionBegin;
3525   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3526   PetscFunctionReturn(PETSC_SUCCESS);
3527 }
3528 
3529 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3530 {
3531   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3532 
3533   PetscFunctionBegin;
3534   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3535   PetscFunctionReturn(PETSC_SUCCESS);
3536 }
3537 
3538 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3539 {
3540   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3541 
3542   PetscFunctionBegin;
3543   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3544   *size  = 0;
3545   *array = NULL;
3546   if (!mumps->myid) {
3547     *size = mumps->id.INFOG(28);
3548     PetscCall(PetscMalloc1(*size, array));
3549     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3550   }
3551   PetscFunctionReturn(PETSC_SUCCESS);
3552 }
3553 
3554 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3555 {
3556   Mat          Bt = NULL, Btseq = NULL;
3557   PetscBool    flg;
3558   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3559   PetscScalar *aa;
3560   PetscInt     spnr, *ia, *ja, M, nrhs;
3561 
3562   PetscFunctionBegin;
3563   PetscAssertPointer(spRHS, 2);
3564   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3565   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3566   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3567   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3568 
3569   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3570 
3571   if (mumps->petsc_size > 1) {
3572     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3573     Btseq         = b->A;
3574   } else {
3575     Btseq = Bt;
3576   }
3577 
3578   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3579   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3580   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3581   mumps->id.rhs = NULL;
3582 
3583   if (!mumps->myid) {
3584     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3585     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3586     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3587     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3588     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3589   } else {
3590     mumps->id.irhs_ptr    = NULL;
3591     mumps->id.irhs_sparse = NULL;
3592     mumps->id.nz_rhs      = 0;
3593     if (mumps->id.rhs_sparse_len) {
3594       PetscCall(PetscFree(mumps->id.rhs_sparse));
3595       mumps->id.rhs_sparse_len = 0;
3596     }
3597   }
3598   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3599   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3600 
3601   /* solve phase */
3602   mumps->id.job = JOB_SOLVE;
3603   PetscMUMPS_c(mumps);
3604   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3605 
3606   if (!mumps->myid) {
3607     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3608     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3609     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3610   }
3611   PetscFunctionReturn(PETSC_SUCCESS);
3612 }
3613 
3614 /*@
3615   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3616 
3617   Logically Collective
3618 
3619   Input Parameter:
3620 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3621 
3622   Output Parameter:
3623 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3624 
3625   Level: beginner
3626 
3627 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3628 @*/
3629 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3630 {
3631   PetscFunctionBegin;
3632   PetscValidType(F, 1);
3633   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3634   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3635   PetscFunctionReturn(PETSC_SUCCESS);
3636 }
3637 
3638 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3639 {
3640   Mat spRHS;
3641 
3642   PetscFunctionBegin;
3643   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3644   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3645   PetscCall(MatDestroy(&spRHS));
3646   PetscFunctionReturn(PETSC_SUCCESS);
3647 }
3648 
3649 /*@
3650   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3651 
3652   Logically Collective
3653 
3654   Input Parameter:
3655 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3656 
3657   Output Parameter:
3658 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3659 
3660   Level: beginner
3661 
3662 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3663 @*/
3664 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3665 {
3666   PetscBool flg;
3667 
3668   PetscFunctionBegin;
3669   PetscValidType(F, 1);
3670   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3671   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3672   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3673   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3674   PetscFunctionReturn(PETSC_SUCCESS);
3675 }
3676 
3677 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3678 {
3679   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3680 
3681   PetscFunctionBegin;
3682   if (nblk) {
3683     PetscAssertPointer(blkptr, 4);
3684     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3685     PetscCall(PetscFree(mumps->id.blkptr));
3686     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3687     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3688     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3689     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3690     // mumps->id.ICNTL(15) = 1;
3691     if (blkvar) {
3692       PetscCall(PetscFree(mumps->id.blkvar));
3693       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3694       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3695     }
3696   } else {
3697     PetscCall(PetscFree(mumps->id.blkptr));
3698     PetscCall(PetscFree(mumps->id.blkvar));
3699     // mumps->id.ICNTL(15) = 0;
3700     mumps->id.nblk = 0;
3701   }
3702   PetscFunctionReturn(PETSC_SUCCESS);
3703 }
3704 
3705 /*@
3706   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3707 
3708   Not collective, only relevant on the first process of the MPI communicator
3709 
3710   Input Parameters:
3711 + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3712 . nblk   - the number of blocks
3713 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3714 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3715 
3716   Level: advanced
3717 
3718 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3719 @*/
3720 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3721 {
3722   PetscFunctionBegin;
3723   PetscValidType(F, 1);
3724   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3725   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3726   PetscFunctionReturn(PETSC_SUCCESS);
3727 }
3728 
3729 /*@
3730   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3731 
3732   Logically Collective
3733 
3734   Input Parameters:
3735 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3736 - icntl - index of MUMPS parameter array INFO()
3737 
3738   Output Parameter:
3739 . ival - value of MUMPS INFO(icntl)
3740 
3741   Level: beginner
3742 
3743 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3744 @*/
3745 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3746 {
3747   PetscFunctionBegin;
3748   PetscValidType(F, 1);
3749   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3750   PetscAssertPointer(ival, 3);
3751   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3752   PetscFunctionReturn(PETSC_SUCCESS);
3753 }
3754 
3755 /*@
3756   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3757 
3758   Logically Collective
3759 
3760   Input Parameters:
3761 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3762 - icntl - index of MUMPS parameter array INFOG()
3763 
3764   Output Parameter:
3765 . ival - value of MUMPS INFOG(icntl)
3766 
3767   Level: beginner
3768 
3769 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3770 @*/
3771 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3772 {
3773   PetscFunctionBegin;
3774   PetscValidType(F, 1);
3775   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3776   PetscAssertPointer(ival, 3);
3777   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3778   PetscFunctionReturn(PETSC_SUCCESS);
3779 }
3780 
3781 /*@
3782   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3783 
3784   Logically Collective
3785 
3786   Input Parameters:
3787 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3788 - icntl - index of MUMPS parameter array RINFO()
3789 
3790   Output Parameter:
3791 . val - value of MUMPS RINFO(icntl)
3792 
3793   Level: beginner
3794 
3795 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3796 @*/
3797 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3798 {
3799   PetscFunctionBegin;
3800   PetscValidType(F, 1);
3801   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3802   PetscAssertPointer(val, 3);
3803   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3804   PetscFunctionReturn(PETSC_SUCCESS);
3805 }
3806 
3807 /*@
3808   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3809 
3810   Logically Collective
3811 
3812   Input Parameters:
3813 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3814 - icntl - index of MUMPS parameter array RINFOG()
3815 
3816   Output Parameter:
3817 . val - value of MUMPS RINFOG(icntl)
3818 
3819   Level: beginner
3820 
3821 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3822 @*/
3823 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3824 {
3825   PetscFunctionBegin;
3826   PetscValidType(F, 1);
3827   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3828   PetscAssertPointer(val, 3);
3829   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3830   PetscFunctionReturn(PETSC_SUCCESS);
3831 }
3832 
3833 /*@
3834   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3835 
3836   Logically Collective
3837 
3838   Input Parameter:
3839 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3840 
3841   Output Parameters:
3842 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3843 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3844           for freeing this array.
3845 
3846   Level: beginner
3847 
3848 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3849 @*/
3850 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3851 {
3852   PetscFunctionBegin;
3853   PetscValidType(F, 1);
3854   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3855   PetscAssertPointer(size, 2);
3856   PetscAssertPointer(array, 3);
3857   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3858   PetscFunctionReturn(PETSC_SUCCESS);
3859 }
3860 
3861 /*MC
3862   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3863   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3864 
3865   Works with `MATAIJ` and `MATSBAIJ` matrices
3866 
3867   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3868 
3869   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3870   See details below.
3871 
3872   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3873 
3874   Options Database Keys:
3875 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3876 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3877 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3878 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3879 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3880 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3881                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3882 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3883 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3884 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3885 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3886 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3887 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3888 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3889 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3890 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3891 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3892 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3893 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3894 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3895 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3896 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3897 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3898 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3899 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3900 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3901 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3902 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3903 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3904 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3905 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3906 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3907 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3908 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3909 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3910 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3911 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3912 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3913 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3914                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3915 
3916   Level: beginner
3917 
3918   Notes:
3919   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3920   error if the matrix is Hermitian.
3921 
3922   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3923   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3924 
3925   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3926   the failure with
3927 .vb
3928           KSPGetPC(ksp,&pc);
3929           PCFactorGetMatrix(pc,&mat);
3930           MatMumpsGetInfo(mat,....);
3931           MatMumpsGetInfog(mat,....); etc.
3932 .ve
3933   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3934 
3935   MUMPS provides 64-bit integer support in two build modes:
3936   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3937   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3938 
3939   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3940   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3941   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3942   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3943 
3944   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3945 
3946   Two modes to run MUMPS/PETSc with OpenMP
3947 .vb
3948    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3949    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3950 .ve
3951 
3952 .vb
3953    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3954    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3955 .ve
3956 
3957    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3958    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3959    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3960    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3961    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3962 
3963    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3964    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3965    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3966    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3967    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3968    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3969    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3970    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3971    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3972    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3973    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3974    examine the mapping result.
3975 
3976    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3977    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3978    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3979 
3980    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3981 
3982 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3983 M*/
3984 
3985 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3986 {
3987   PetscFunctionBegin;
3988   *type = MATSOLVERMUMPS;
3989   PetscFunctionReturn(PETSC_SUCCESS);
3990 }
3991 
3992 /* MatGetFactor for Seq and MPI AIJ matrices */
3993 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3994 {
3995   Mat         B;
3996   Mat_MUMPS  *mumps;
3997   PetscBool   isSeqAIJ, isDiag, isDense;
3998   PetscMPIInt size;
3999 
4000   PetscFunctionBegin;
4001 #if defined(PETSC_USE_COMPLEX)
4002   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4003     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4004     *F = NULL;
4005     PetscFunctionReturn(PETSC_SUCCESS);
4006   }
4007 #endif
4008   /* Create the factorization matrix */
4009   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4010   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4011   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4012   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4013   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4014   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4015   PetscCall(MatSetUp(B));
4016 
4017   PetscCall(PetscNew(&mumps));
4018 
4019   B->ops->view    = MatView_MUMPS;
4020   B->ops->getinfo = MatGetInfo_MUMPS;
4021 
4022   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4023   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4024   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4025   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4027   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4028   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4029   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4030   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4031   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4032   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4037 
4038   if (ftype == MAT_FACTOR_LU) {
4039     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4040     B->factortype            = MAT_FACTOR_LU;
4041     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4042     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4043     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4044     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4045     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4046     mumps->sym = 0;
4047   } else {
4048     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4049     B->factortype                  = MAT_FACTOR_CHOLESKY;
4050     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4051     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4052     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4053     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4054     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4055 #if defined(PETSC_USE_COMPLEX)
4056     mumps->sym = 2;
4057 #else
4058     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4059     else mumps->sym = 2;
4060 #endif
4061   }
4062 
4063   /* set solvertype */
4064   PetscCall(PetscFree(B->solvertype));
4065   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4066   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4067   if (size == 1) {
4068     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4069     B->canuseordering = PETSC_TRUE;
4070   }
4071   B->ops->destroy = MatDestroy_MUMPS;
4072   B->data         = (void *)mumps;
4073 
4074   *F               = B;
4075   mumps->id.job    = JOB_NULL;
4076   mumps->ICNTL_pre = NULL;
4077   mumps->CNTL_pre  = NULL;
4078   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4079   PetscFunctionReturn(PETSC_SUCCESS);
4080 }
4081 
4082 /* MatGetFactor for Seq and MPI SBAIJ matrices */
4083 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4084 {
4085   Mat         B;
4086   Mat_MUMPS  *mumps;
4087   PetscBool   isSeqSBAIJ;
4088   PetscMPIInt size;
4089 
4090   PetscFunctionBegin;
4091 #if defined(PETSC_USE_COMPLEX)
4092   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4093     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4094     *F = NULL;
4095     PetscFunctionReturn(PETSC_SUCCESS);
4096   }
4097 #endif
4098   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4099   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4100   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4101   PetscCall(MatSetUp(B));
4102 
4103   PetscCall(PetscNew(&mumps));
4104   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4105   if (isSeqSBAIJ) {
4106     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4107   } else {
4108     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4109   }
4110 
4111   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4112   B->ops->view                   = MatView_MUMPS;
4113   B->ops->getinfo                = MatGetInfo_MUMPS;
4114 
4115   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4116   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4117   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4118   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4119   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4120   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4121   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4122   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4123   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4124   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4130 
4131   B->factortype = MAT_FACTOR_CHOLESKY;
4132 #if defined(PETSC_USE_COMPLEX)
4133   mumps->sym = 2;
4134 #else
4135   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4136   else mumps->sym = 2;
4137 #endif
4138 
4139   /* set solvertype */
4140   PetscCall(PetscFree(B->solvertype));
4141   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4142   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4143   if (size == 1) {
4144     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4145     B->canuseordering = PETSC_TRUE;
4146   }
4147   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4148   B->ops->destroy = MatDestroy_MUMPS;
4149   B->data         = (void *)mumps;
4150 
4151   *F               = B;
4152   mumps->id.job    = JOB_NULL;
4153   mumps->ICNTL_pre = NULL;
4154   mumps->CNTL_pre  = NULL;
4155   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4156   PetscFunctionReturn(PETSC_SUCCESS);
4157 }
4158 
4159 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4160 {
4161   Mat         B;
4162   Mat_MUMPS  *mumps;
4163   PetscBool   isSeqBAIJ;
4164   PetscMPIInt size;
4165 
4166   PetscFunctionBegin;
4167   /* Create the factorization matrix */
4168   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4169   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4170   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4171   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4172   PetscCall(MatSetUp(B));
4173 
4174   PetscCall(PetscNew(&mumps));
4175   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4176   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4177   B->factortype            = MAT_FACTOR_LU;
4178   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4179   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4180   mumps->sym = 0;
4181   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4182 
4183   B->ops->view    = MatView_MUMPS;
4184   B->ops->getinfo = MatGetInfo_MUMPS;
4185 
4186   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4201 
4202   /* set solvertype */
4203   PetscCall(PetscFree(B->solvertype));
4204   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4205   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4206   if (size == 1) {
4207     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4208     B->canuseordering = PETSC_TRUE;
4209   }
4210   B->ops->destroy = MatDestroy_MUMPS;
4211   B->data         = (void *)mumps;
4212 
4213   *F               = B;
4214   mumps->id.job    = JOB_NULL;
4215   mumps->ICNTL_pre = NULL;
4216   mumps->CNTL_pre  = NULL;
4217   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4218   PetscFunctionReturn(PETSC_SUCCESS);
4219 }
4220 
4221 /* MatGetFactor for Seq and MPI SELL matrices */
4222 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4223 {
4224   Mat         B;
4225   Mat_MUMPS  *mumps;
4226   PetscBool   isSeqSELL;
4227   PetscMPIInt size;
4228 
4229   PetscFunctionBegin;
4230   /* Create the factorization matrix */
4231   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4232   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4233   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4234   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4235   PetscCall(MatSetUp(B));
4236 
4237   PetscCall(PetscNew(&mumps));
4238 
4239   B->ops->view    = MatView_MUMPS;
4240   B->ops->getinfo = MatGetInfo_MUMPS;
4241 
4242   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4243   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4244   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4245   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4246   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4247   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4248   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4251   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4254 
4255   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4256   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4257   B->factortype            = MAT_FACTOR_LU;
4258   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4259   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4260   mumps->sym              = 0;
4261   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4262 
4263   /* set solvertype */
4264   PetscCall(PetscFree(B->solvertype));
4265   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4266   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4267   if (size == 1) {
4268     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4269     B->canuseordering = PETSC_TRUE;
4270   }
4271   B->ops->destroy = MatDestroy_MUMPS;
4272   B->data         = (void *)mumps;
4273 
4274   *F               = B;
4275   mumps->id.job    = JOB_NULL;
4276   mumps->ICNTL_pre = NULL;
4277   mumps->CNTL_pre  = NULL;
4278   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4279   PetscFunctionReturn(PETSC_SUCCESS);
4280 }
4281 
4282 /* MatGetFactor for MATNEST matrices */
4283 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4284 {
4285   Mat         B, **mats;
4286   Mat_MUMPS  *mumps;
4287   PetscInt    nr, nc;
4288   PetscMPIInt size;
4289   PetscBool   flg = PETSC_TRUE;
4290 
4291   PetscFunctionBegin;
4292 #if defined(PETSC_USE_COMPLEX)
4293   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4294     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4295     *F = NULL;
4296     PetscFunctionReturn(PETSC_SUCCESS);
4297   }
4298 #endif
4299 
4300   /* Return if some condition is not satisfied */
4301   *F = NULL;
4302   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4303   if (ftype == MAT_FACTOR_CHOLESKY) {
4304     IS       *rows, *cols;
4305     PetscInt *m, *M;
4306 
4307     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4308     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4309     PetscCall(MatNestGetISs(A, rows, cols));
4310     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4311     if (!flg) {
4312       PetscCall(PetscFree2(rows, cols));
4313       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4314       PetscFunctionReturn(PETSC_SUCCESS);
4315     }
4316     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4317     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4318     for (PetscInt r = 0; flg && r < nr; r++)
4319       for (PetscInt k = r + 1; flg && k < nr; k++)
4320         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4321     PetscCall(PetscFree2(m, M));
4322     PetscCall(PetscFree2(rows, cols));
4323     if (!flg) {
4324       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4325       PetscFunctionReturn(PETSC_SUCCESS);
4326     }
4327   }
4328 
4329   for (PetscInt r = 0; r < nr; r++) {
4330     for (PetscInt c = 0; c < nc; c++) {
4331       Mat       sub = mats[r][c];
4332       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4333 
4334       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4335       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4336       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4337       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4338       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4339       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4340       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4341       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4342       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4343       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4344       if (ftype == MAT_FACTOR_CHOLESKY) {
4345         if (r == c) {
4346           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4347             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4348             flg = PETSC_FALSE;
4349           }
4350         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4351           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4352           flg = PETSC_FALSE;
4353         }
4354       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4355         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4356         flg = PETSC_FALSE;
4357       }
4358     }
4359   }
4360   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4361 
4362   /* Create the factorization matrix */
4363   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4364   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4365   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4366   PetscCall(MatSetUp(B));
4367 
4368   PetscCall(PetscNew(&mumps));
4369 
4370   B->ops->view    = MatView_MUMPS;
4371   B->ops->getinfo = MatGetInfo_MUMPS;
4372 
4373   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4374   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4375   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4376   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4377   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4378   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4379   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4380   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4381   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4382   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4383   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4384   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4385   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4386   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4387   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4388 
4389   if (ftype == MAT_FACTOR_LU) {
4390     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4391     B->factortype            = MAT_FACTOR_LU;
4392     mumps->sym               = 0;
4393   } else {
4394     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4395     B->factortype                  = MAT_FACTOR_CHOLESKY;
4396 #if defined(PETSC_USE_COMPLEX)
4397     mumps->sym = 2;
4398 #else
4399     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4400     else mumps->sym = 2;
4401 #endif
4402   }
4403   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4404   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4405 
4406   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4407   if (size == 1) {
4408     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4409     B->canuseordering = PETSC_TRUE;
4410   }
4411 
4412   /* set solvertype */
4413   PetscCall(PetscFree(B->solvertype));
4414   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4415   B->ops->destroy = MatDestroy_MUMPS;
4416   B->data         = (void *)mumps;
4417 
4418   *F               = B;
4419   mumps->id.job    = JOB_NULL;
4420   mumps->ICNTL_pre = NULL;
4421   mumps->CNTL_pre  = NULL;
4422   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4423   PetscFunctionReturn(PETSC_SUCCESS);
4424 }
4425 
4426 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4427 {
4428   PetscFunctionBegin;
4429   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4430   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4431   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4432   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4433   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4434   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4435   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4436   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4437   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4438   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4439   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4440   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4441   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4442   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4443   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4444   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4445   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4446   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4447   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4448   PetscFunctionReturn(PETSC_SUCCESS);
4449 }
4450