xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision bcd4bb4a4158aa96f212e9537e87b40407faf83e)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 #include <petsc/private/vecimpl.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15   #include <cmumps_c.h>
16   #include <zmumps_c.h>
17   #include <smumps_c.h>
18   #include <dmumps_c.h>
19 #else
20   #if defined(PETSC_USE_COMPLEX)
21     #if defined(PETSC_USE_REAL_SINGLE)
22       #include <cmumps_c.h>
23       #define MUMPS_c     cmumps_c
24       #define MumpsScalar CMUMPS_COMPLEX
25     #else
26       #include <zmumps_c.h>
27       #define MUMPS_c     zmumps_c
28       #define MumpsScalar ZMUMPS_COMPLEX
29     #endif
30   #else
31     #if defined(PETSC_USE_REAL_SINGLE)
32       #include <smumps_c.h>
33       #define MUMPS_c     smumps_c
34       #define MumpsScalar SMUMPS_REAL
35     #else
36       #include <dmumps_c.h>
37       #define MUMPS_c     dmumps_c
38       #define MumpsScalar DMUMPS_REAL
39     #endif
40   #endif
41 #endif
42 #if defined(PETSC_USE_COMPLEX)
43   #if defined(PETSC_USE_REAL_SINGLE)
44     #define MUMPS_STRUC_C CMUMPS_STRUC_C
45   #else
46     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
47   #endif
48 #else
49   #if defined(PETSC_USE_REAL_SINGLE)
50     #define MUMPS_STRUC_C SMUMPS_STRUC_C
51   #else
52     #define MUMPS_STRUC_C DMUMPS_STRUC_C
53   #endif
54 #endif
55 EXTERN_C_END
56 
57 #define JOB_INIT         -1
58 #define JOB_NULL         0
59 #define JOB_FACTSYMBOLIC 1
60 #define JOB_FACTNUMERIC  2
61 #define JOB_SOLVE        3
62 #define JOB_END          -2
63 
64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
65    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
66    naming convention in PetscMPIInt, PetscBLASInt etc.
67 */
68 typedef MUMPS_INT PetscMUMPSInt;
69 
70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
71   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
72     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
73   #endif
74 #else
75   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
76     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
77   #endif
78 #endif
79 
80 #define MPIU_MUMPSINT       MPI_INT
81 #define PETSC_MUMPS_INT_MAX 2147483647
82 #define PETSC_MUMPS_INT_MIN -2147483648
83 
84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
86 {
87   PetscFunctionBegin;
88 #if PetscDefined(USE_64BIT_INDICES)
89   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
90 #endif
91   *b = (PetscMUMPSInt)a;
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 /* Put these utility routines here since they are only used in this file */
96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
97 {
98   PetscInt  myval;
99   PetscBool myset;
100 
101   PetscFunctionBegin;
102   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105   if (set) *set = myset;
106   PetscFunctionReturn(PETSC_SUCCESS);
107 }
108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
109 
110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111 //
112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115 //
116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119 typedef struct {
120   PetscPrecision precision;   // precision used by MUMPS
121   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
122 
123   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124   MUMPS_INT  sym, par, job;
125   MUMPS_INT  comm_fortran; /* Fortran communicator */
126   MUMPS_INT *icntl;
127   void      *cntl; // MumpsReal, fixed size array
128   MUMPS_INT  n;
129   MUMPS_INT  nblk;
130 
131   /* Assembled entry */
132   MUMPS_INT8 nnz;
133   MUMPS_INT *irn;
134   MUMPS_INT *jcn;
135   void      *a; // MumpsScalar, centralized input
136   PetscCount a_len;
137 
138   /* Distributed entry */
139   MUMPS_INT8 nnz_loc;
140   MUMPS_INT *irn_loc;
141   MUMPS_INT *jcn_loc;
142   void      *a_loc; // MumpsScalar, distributed input
143   PetscCount a_loc_len;
144 
145   /* Matrix by blocks */
146   MUMPS_INT *blkptr;
147   MUMPS_INT *blkvar;
148 
149   /* Ordering, if given by user */
150   MUMPS_INT *perm_in;
151 
152   /* RHS, solution, ouptput data and statistics */
153   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
155 
156   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159   MUMPS_INT  schur_lld;
160   MUMPS_INT *info, *infog;   // fixed size array
161   void      *rinfo, *rinfog; // MumpsReal, fixed size array
162 
163   /* Null space */
164   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165   MUMPS_INT *mapping;     // allocated by MUMPS!
166 
167   /* Schur */
168   MUMPS_INT  size_schur;
169   MUMPS_INT *listvar_schur;
170   void      *schur; // MumpsScalar
171   PetscCount schur_len;
172 
173   /* For out-of-core */
174   char *ooc_tmpdir; // fixed size array
175   char *ooc_prefix; // fixed size array
176 } XMUMPS_STRUC_C;
177 
178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179 #define AllocateInternalID(MUMPS_STRUC_T, outer) \
180   do { \
181     MUMPS_STRUC_T *inner; \
182     PetscCall(PetscNew(&inner)); \
183     outer->icntl      = inner->icntl; \
184     outer->cntl       = inner->cntl; \
185     outer->info       = inner->info; \
186     outer->infog      = inner->infog; \
187     outer->rinfo      = inner->rinfo; \
188     outer->rinfog     = inner->rinfog; \
189     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190     outer->ooc_prefix = inner->ooc_prefix; \
191     /* the three field should never change after init */ \
192     inner->comm_fortran = outer->comm_fortran; \
193     inner->par          = outer->par; \
194     inner->sym          = outer->sym; \
195     outer->internal_id  = inner; \
196   } while (0)
197 
198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200 {
201   PetscFunctionBegin;
202   outer->precision = precision;
203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204   #if defined(PETSC_USE_COMPLEX)
205   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer);
206   else AllocateInternalID(ZMUMPS_STRUC_C, outer);
207   #else
208   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer);
209   else AllocateInternalID(DMUMPS_STRUC_C, outer);
210   #endif
211 #else
212   AllocateInternalID(MUMPS_STRUC_C, outer);
213 #endif
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218   do { \
219     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220     PetscCall(PetscFree(inner->a)); \
221     PetscCall(PetscFree(inner->a_loc)); \
222     PetscCall(PetscFree(inner->redrhs)); \
223     PetscCall(PetscFree(inner->rhs)); \
224     PetscCall(PetscFree(inner->rhs_sparse)); \
225     PetscCall(PetscFree(inner->rhs_loc)); \
226     PetscCall(PetscFree(inner->sol_loc)); \
227     PetscCall(PetscFree(inner->schur)); \
228   } while (0)
229 
230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231 {
232   PetscFunctionBegin;
233   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235     const PetscPrecision mumps_precision = outer->precision;
236     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237   #if defined(PETSC_USE_COMPLEX)
238       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240   #else
241       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243   #endif
244     }
245 #endif
246     PetscCall(PetscFree(outer->internal_id));
247   }
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
255 //
256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
259 //
260 //
261 //   Input parameters:
262 // + convert   - whether to do PetscScalar to MumpsScalar conversion
263 // . n         - length of the PetscScalar array
264 // . pa        - [n]], points to the PetscScalar array
265 // . precision - precision of MumpsScalar
266 // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
267 // - ma        - on input, an existing MumpsScalar array if any.
268 //
269 //   Output parameters:
270 // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
272 //
273 //   Note:
274 //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276 {
277   PetscFunctionBegin;
278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279   const PetscPrecision mumps_precision = precision;
280   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281   #if defined(PETSC_USE_COMPLEX)
282   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284       if (*m < n) {
285         PetscCall(PetscFree(*ma));
286         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287         *m = n;
288       }
289       if (convert) {
290         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291         for (PetscCount i = 0; i < n; i++) {
292           b[i].r = PetscRealPart(pa[i]);
293           b[i].i = PetscImaginaryPart(pa[i]);
294         }
295       }
296     } else {
297       if (*m < n) {
298         PetscCall(PetscFree(*ma));
299         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300         *m = n;
301       }
302       if (convert) {
303         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304         for (PetscCount i = 0; i < n; i++) {
305           b[i].r = PetscRealPart(pa[i]);
306           b[i].i = PetscImaginaryPart(pa[i]);
307         }
308       }
309     }
310   }
311   #else
312   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314       if (*m < n) {
315         PetscCall(PetscFree(*ma));
316         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317         *m = n;
318       }
319       if (convert) {
320         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322       }
323     } else {
324       if (*m < n) {
325         PetscCall(PetscFree(*ma));
326         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327         *m = n;
328       }
329       if (convert) {
330         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332       }
333     }
334   }
335   #endif
336   else
337 #endif
338   {
339     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340     *ma = (void *)pa;                       // same precision, make them alias
341     *m  = 0;
342   }
343   PetscFunctionReturn(PETSC_SUCCESS);
344 }
345 
346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
347 //
348 // 1) If the two types are different, cast array elements.
349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351 {
352   PetscFunctionBegin;
353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355   #if defined(PETSC_USE_COMPLEX)
356     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357       PetscReal         *a = (PetscReal *)pa;
358       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360     } else {
361       PetscReal         *a = (PetscReal *)pa;
362       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364     }
365   #else
366     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369     } else {
370       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372     }
373   #endif
374   } else
375 #endif
376     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
381 //
382 // 1) If the two types are different, cast array elements.
383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385 {
386   PetscFunctionBegin;
387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389   #if defined(PETSC_USE_COMPLEX)
390     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392       for (PetscCount i = 0; i < n; i++) {
393         b[i].r = PetscRealPart(pa[i]);
394         b[i].i = PetscImaginaryPart(pa[i]);
395       }
396     } else {
397       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398       for (PetscCount i = 0; i < n; i++) {
399         b[i].r = PetscRealPart(pa[i]);
400         b[i].i = PetscImaginaryPart(pa[i]);
401       }
402     }
403   #else
404     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407     } else {
408       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410     }
411   #endif
412   } else
413 #endif
414     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419 {
420   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421 }
422 
423 #define PreMumpsCall(inner, outer, mumpsscalar) \
424   do { \
425     inner->job           = outer->job; \
426     inner->n             = outer->n; \
427     inner->nblk          = outer->nblk; \
428     inner->nnz           = outer->nnz; \
429     inner->irn           = outer->irn; \
430     inner->jcn           = outer->jcn; \
431     inner->a             = (mumpsscalar *)outer->a; \
432     inner->nnz_loc       = outer->nnz_loc; \
433     inner->irn_loc       = outer->irn_loc; \
434     inner->jcn_loc       = outer->jcn_loc; \
435     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436     inner->blkptr        = outer->blkptr; \
437     inner->blkvar        = outer->blkvar; \
438     inner->perm_in       = outer->perm_in; \
439     inner->rhs           = (mumpsscalar *)outer->rhs; \
440     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444     inner->irhs_sparse   = outer->irhs_sparse; \
445     inner->irhs_ptr      = outer->irhs_ptr; \
446     inner->isol_loc      = outer->isol_loc; \
447     inner->irhs_loc      = outer->irhs_loc; \
448     inner->nrhs          = outer->nrhs; \
449     inner->lrhs          = outer->lrhs; \
450     inner->lredrhs       = outer->lredrhs; \
451     inner->nz_rhs        = outer->nz_rhs; \
452     inner->lsol_loc      = outer->lsol_loc; \
453     inner->nloc_rhs      = outer->nloc_rhs; \
454     inner->lrhs_loc      = outer->lrhs_loc; \
455     inner->schur_lld     = outer->schur_lld; \
456     inner->size_schur    = outer->size_schur; \
457     inner->listvar_schur = outer->listvar_schur; \
458     inner->schur         = (mumpsscalar *)outer->schur; \
459   } while (0)
460 
461 #define PostMumpsCall(inner, outer) \
462   do { \
463     outer->pivnul_list = inner->pivnul_list; \
464     outer->mapping     = inner->mapping; \
465   } while (0)
466 
467 // Entry for PETSc to call mumps
468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469 {
470   PetscFunctionBegin;
471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472   #if defined(PETSC_USE_COMPLEX)
473   if (outer->precision == PETSC_PRECISION_SINGLE) {
474     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477     PostMumpsCall(inner, outer);
478   } else {
479     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482     PostMumpsCall(inner, outer);
483   }
484   #else
485   if (outer->precision == PETSC_PRECISION_SINGLE) {
486     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487     PreMumpsCall(inner, outer, SMUMPS_REAL);
488     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489     PostMumpsCall(inner, outer);
490   } else {
491     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492     PreMumpsCall(inner, outer, DMUMPS_REAL);
493     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494     PostMumpsCall(inner, outer);
495   }
496   #endif
497 #else
498   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499   PreMumpsCall(inner, outer, MumpsScalar);
500   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501   PostMumpsCall(inner, outer);
502 #endif
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505 
506 /* macros s.t. indices match MUMPS documentation */
507 #define ICNTL(I) icntl[(I) - 1]
508 #define INFOG(I) infog[(I) - 1]
509 #define INFO(I)  info[(I) - 1]
510 
511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512 #if defined(PETSC_USE_COMPLEX)
513   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514 #else
515   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516 #endif
517 
518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519 #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521 #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
522 
523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
524 #define ID_CNTL_SET(ID, I, VAL) \
525   do { \
526     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528   } while (0)
529 
530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532   #define PetscMUMPS_c(mumps) \
533     do { \
534       if (mumps->use_petsc_omp_support) { \
535         if (mumps->is_omp_master) { \
536           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539           PetscCall(PetscFPTrapPop()); \
540           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541         } \
542         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547       */ \
548         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553       } else { \
554         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556         PetscCall(PetscFPTrapPop()); \
557       } \
558     } while (0)
559 #else
560   #define PetscMUMPS_c(mumps) \
561     do { \
562       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564       PetscCall(PetscFPTrapPop()); \
565     } while (0)
566 #endif
567 
568 typedef struct Mat_MUMPS Mat_MUMPS;
569 struct Mat_MUMPS {
570   XMUMPS_STRUC_C id;
571 
572   MatStructure   matstruc;
573   PetscMPIInt    myid, petsc_size;
574   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577   PetscMUMPSInt  sym;
578   MPI_Comm       mumps_comm;
579   PetscMUMPSInt *ICNTL_pre;
580   PetscReal     *CNTL_pre;
581   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584   PetscMUMPSInt  ICNTL26;
585   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587   PetscInt    *rhs_nrow, max_nrhs;
588   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589   PetscScalar *rhs_loc, *rhs_recvbuf;
590 #endif
591   Vec            b_seq, x_seq;
592   PetscInt       ninfo, *info; /* which INFO to display */
593   PetscInt       sizeredrhs;
594   PetscScalar   *schur_sol;
595   PetscInt       schur_sizesol;
596   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
600 
601   /* Support for MATNEST */
602   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603   PetscCount  *nest_vals_start;
604   PetscScalar *nest_vals;
605 
606   /* stuff used by petsc/mumps OpenMP support*/
607   PetscBool    use_petsc_omp_support;
608   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611   PetscMPIInt  tag, omp_comm_size;
612   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613   MPI_Request *reqs;
614 };
615 
616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618  */
619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620 {
621   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
622 
623   PetscFunctionBegin;
624 #if defined(PETSC_USE_64BIT_INDICES)
625   {
626     PetscInt i;
627     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628       PetscCall(PetscFree(mumps->ia_alloc));
629       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630       mumps->cur_ilen = nrow + 1;
631     }
632     if (nnz > mumps->cur_jlen) {
633       PetscCall(PetscFree(mumps->ja_alloc));
634       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635       mumps->cur_jlen = nnz;
636     }
637     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639     *ia_mumps = mumps->ia_alloc;
640     *ja_mumps = mumps->ja_alloc;
641   }
642 #else
643   *ia_mumps = ia;
644   *ja_mumps = ja;
645 #endif
646   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651 {
652   PetscFunctionBegin;
653   PetscCall(PetscFree(mumps->id.listvar_schur));
654   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655   PetscCall(PetscFree(mumps->schur_sol));
656   mumps->id.size_schur = 0;
657   mumps->id.schur_lld  = 0;
658   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 /* solve with rhs in mumps->id.redrhs and return in the same location */
663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664 {
665   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
666   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
667   MatFactorSchurStatus schurstatus;
668   PetscInt             sizesol;
669   const PetscScalar   *xarray;
670 
671   PetscFunctionBegin;
672   PetscCall(MatFactorFactorizeSchurComplement(F));
673   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
675 
676   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679   PetscCall(MatBindToCPU(B, S->boundtocpu));
680 #endif
681   switch (schurstatus) {
682   case MAT_FACTOR_SCHUR_FACTORED:
683     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686     PetscCall(MatBindToCPU(X, S->boundtocpu));
687 #endif
688     if (!mumps->id.ICNTL(9)) { /* transpose solve */
689       PetscCall(MatMatSolveTranspose(S, B, X));
690     } else {
691       PetscCall(MatMatSolve(S, B, X));
692     }
693     break;
694   case MAT_FACTOR_SCHUR_INVERTED:
695     sizesol = mumps->id.nrhs * mumps->id.size_schur;
696     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697       PetscCall(PetscFree(mumps->schur_sol));
698       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699       mumps->schur_sizesol = sizesol;
700     }
701     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704     PetscCall(MatBindToCPU(X, S->boundtocpu));
705 #endif
706     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707     if (!mumps->id.ICNTL(9)) { /* transpose solve */
708       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709     } else {
710       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711     }
712     PetscCall(MatProductSetFromOptions(X));
713     PetscCall(MatProductSymbolic(X));
714     PetscCall(MatProductNumeric(X));
715 
716     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717     break;
718   default:
719     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720   }
721   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726   PetscCall(MatDestroy(&B));
727   PetscCall(MatDestroy(&X));
728   PetscFunctionReturn(PETSC_SUCCESS);
729 }
730 
731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732 {
733   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
734 
735   PetscFunctionBegin;
736   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737     PetscFunctionReturn(PETSC_SUCCESS);
738   }
739   if (!expansion) { /* prepare for the condensation step */
740     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741     /* allocate MUMPS internal array to store reduced right-hand sides */
742     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743       mumps->id.lredrhs = mumps->id.size_schur;
744       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746       PetscCall(PetscFree(mumps->redrhs));
747       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749     }
750   } else {                                    /* prepare for the expansion step */
751     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
753     PetscMUMPS_c(mumps);
754     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755     /* restore defaults */
756     mumps->id.ICNTL(26) = -1;
757     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758     if (mumps->id.nrhs > 1) {
759       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760       PetscCall(PetscFree(mumps->redrhs));
761       mumps->id.redrhs_len = 0;
762       mumps->id.lredrhs    = 0;
763       mumps->sizeredrhs    = 0;
764     }
765   }
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 /*
770   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
771 
772   input:
773     A       - matrix in aij,baij or sbaij format
774     shift   - 0: C style output triple; 1: Fortran style output triple.
775     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776               MAT_REUSE_MATRIX:   only the values in v array are updated
777   output:
778     nnz     - dim of r, c, and v (number of local nonzero entries of A)
779     r, c, v - row and col index, matrix values (matrix triples)
780 
781   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
784 
785  */
786 
787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788 {
789   const PetscScalar *av;
790   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
791   PetscCount         nz, rnz, k;
792   PetscMUMPSInt     *row, *col;
793   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
794 
795   PetscFunctionBegin;
796   PetscCall(MatSeqAIJGetArrayRead(A, &av));
797   if (reuse == MAT_INITIAL_MATRIX) {
798     nz = aa->nz;
799     ai = aa->i;
800     aj = aa->j;
801     PetscCall(PetscMalloc2(nz, &row, nz, &col));
802     for (PetscCount i = k = 0; i < M; i++) {
803       rnz = ai[i + 1] - ai[i];
804       ajj = aj + ai[i];
805       for (PetscCount j = 0; j < rnz; j++) {
806         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808         k++;
809       }
810     }
811     mumps->val = (PetscScalar *)av;
812     mumps->irn = row;
813     mumps->jcn = col;
814     mumps->nnz = nz;
815   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818   PetscFunctionReturn(PETSC_SUCCESS);
819 }
820 
821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822 {
823   PetscCount     nz, i, j, k, r;
824   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
825   PetscMUMPSInt *row, *col;
826 
827   PetscFunctionBegin;
828   nz = a->sliidx[a->totalslices];
829   if (reuse == MAT_INITIAL_MATRIX) {
830     PetscCall(PetscMalloc2(nz, &row, nz, &col));
831     for (i = k = 0; i < a->totalslices; i++) {
832       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833     }
834     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835     mumps->irn = row;
836     mumps->jcn = col;
837     mumps->nnz = nz;
838     mumps->val = a->val;
839   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845 {
846   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
847   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849   PetscInt        bs;
850   PetscMUMPSInt  *row, *col;
851 
852   PetscFunctionBegin;
853   if (reuse == MAT_INITIAL_MATRIX) {
854     PetscCall(MatGetBlockSize(A, &bs));
855     M  = A->rmap->N / bs;
856     ai = aa->i;
857     aj = aa->j;
858     PetscCall(PetscMalloc2(nz, &row, nz, &col));
859     for (i = 0; i < M; i++) {
860       ajj = aj + ai[i];
861       rnz = ai[i + 1] - ai[i];
862       for (k = 0; k < rnz; k++) {
863         for (j = 0; j < bs; j++) {
864           for (m = 0; m < bs; m++) {
865             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867             idx++;
868           }
869         }
870       }
871     }
872     mumps->irn = row;
873     mumps->jcn = col;
874     mumps->nnz = nz;
875     mumps->val = aa->a;
876   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882 {
883   const PetscInt *ai, *aj, *ajj;
884   PetscInt        bs;
885   PetscCount      nz, rnz, i, j, k, m;
886   PetscMUMPSInt  *row, *col;
887   PetscScalar    *val;
888   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
889   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscBool isset, hermitian;
892 #endif
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_COMPLEX)
896   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898 #endif
899   ai = aa->i;
900   aj = aa->j;
901   PetscCall(MatGetBlockSize(A, &bs));
902   if (reuse == MAT_INITIAL_MATRIX) {
903     const PetscCount alloc_size = aa->nz * bs2;
904 
905     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906     if (bs > 1) {
907       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908       mumps->val = mumps->val_alloc;
909     } else {
910       mumps->val = aa->a;
911     }
912     mumps->irn = row;
913     mumps->jcn = col;
914   } else {
915     row = mumps->irn;
916     col = mumps->jcn;
917   }
918   val = mumps->val;
919 
920   nz = 0;
921   if (bs > 1) {
922     for (i = 0; i < mbs; i++) {
923       rnz = ai[i + 1] - ai[i];
924       ajj = aj + ai[i];
925       for (j = 0; j < rnz; j++) {
926         for (k = 0; k < bs; k++) {
927           for (m = 0; m < bs; m++) {
928             if (ajj[j] > i || k >= m) {
929               if (reuse == MAT_INITIAL_MATRIX) {
930                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932               }
933               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934             }
935           }
936         }
937       }
938     }
939   } else if (reuse == MAT_INITIAL_MATRIX) {
940     for (i = 0; i < mbs; i++) {
941       rnz = ai[i + 1] - ai[i];
942       ajj = aj + ai[i];
943       for (j = 0; j < rnz; j++) {
944         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946         nz++;
947       }
948     }
949     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950   } else if (mumps->nest_vals)
951     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958 {
959   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960   PetscCount         nz, rnz, i, j;
961   const PetscScalar *av, *v1;
962   PetscScalar       *val;
963   PetscMUMPSInt     *row, *col;
964   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
965   PetscBool          diagDense;
966 #if defined(PETSC_USE_COMPLEX)
967   PetscBool hermitian, isset;
968 #endif
969 
970   PetscFunctionBegin;
971 #if defined(PETSC_USE_COMPLEX)
972   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974 #endif
975   PetscCall(MatSeqAIJGetArrayRead(A, &av));
976   ai = aa->i;
977   aj = aa->j;
978   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979   if (reuse == MAT_INITIAL_MATRIX) {
980     /* count nz in the upper triangular part of A */
981     nz = 0;
982     if (!diagDense) {
983       for (i = 0; i < M; i++) {
984         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985           for (j = ai[i]; j < ai[i + 1]; j++) {
986             if (aj[j] < i) continue;
987             nz++;
988           }
989         } else {
990           nz += ai[i + 1] - adiag[i];
991         }
992       }
993     } else {
994       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995     }
996     PetscCall(PetscMalloc2(nz, &row, nz, &col));
997     PetscCall(PetscMalloc1(nz, &val));
998     mumps->nnz = nz;
999     mumps->irn = row;
1000     mumps->jcn = col;
1001     mumps->val = mumps->val_alloc = val;
1002 
1003     nz = 0;
1004     if (!diagDense) {
1005       for (i = 0; i < M; i++) {
1006         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007           for (j = ai[i]; j < ai[i + 1]; j++) {
1008             if (aj[j] < i) continue;
1009             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011             val[nz] = av[j];
1012             nz++;
1013           }
1014         } else {
1015           rnz = ai[i + 1] - adiag[i];
1016           ajj = aj + adiag[i];
1017           v1  = av + adiag[i];
1018           for (j = 0; j < rnz; j++) {
1019             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021             val[nz++] = v1[j];
1022           }
1023         }
1024       }
1025     } else {
1026       for (i = 0; i < M; i++) {
1027         rnz = ai[i + 1] - adiag[i];
1028         ajj = aj + adiag[i];
1029         v1  = av + adiag[i];
1030         for (j = 0; j < rnz; j++) {
1031           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033           val[nz++] = v1[j];
1034         }
1035       }
1036     }
1037   } else {
1038     nz  = 0;
1039     val = mumps->val;
1040     if (!diagDense) {
1041       for (i = 0; i < M; i++) {
1042         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043           for (j = ai[i]; j < ai[i + 1]; j++) {
1044             if (aj[j] < i) continue;
1045             val[nz++] = av[j];
1046           }
1047         } else {
1048           rnz = ai[i + 1] - adiag[i];
1049           v1  = av + adiag[i];
1050           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051         }
1052       }
1053     } else {
1054       for (i = 0; i < M; i++) {
1055         rnz = ai[i + 1] - adiag[i];
1056         v1  = av + adiag[i];
1057         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058       }
1059     }
1060   }
1061   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062   PetscFunctionReturn(PETSC_SUCCESS);
1063 }
1064 
1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066 {
1067   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068   PetscInt           bs;
1069   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070   PetscMUMPSInt     *row, *col;
1071   const PetscScalar *av, *bv, *v1, *v2;
1072   PetscScalar       *val;
1073   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1074   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1075   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1076   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1077 #if defined(PETSC_USE_COMPLEX)
1078   PetscBool hermitian, isset;
1079 #endif
1080 
1081   PetscFunctionBegin;
1082 #if defined(PETSC_USE_COMPLEX)
1083   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085 #endif
1086   PetscCall(MatGetBlockSize(A, &bs));
1087   rstart = A->rmap->rstart;
1088   ai     = aa->i;
1089   aj     = aa->j;
1090   bi     = bb->i;
1091   bj     = bb->j;
1092   av     = aa->a;
1093   bv     = bb->a;
1094 
1095   garray = mat->garray;
1096 
1097   if (reuse == MAT_INITIAL_MATRIX) {
1098     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100     PetscCall(PetscMalloc1(nz, &val));
1101     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102     mumps->irn = row;
1103     mumps->jcn = col;
1104     mumps->val = mumps->val_alloc = val;
1105   } else {
1106     val = mumps->val;
1107   }
1108 
1109   jj   = 0;
1110   irow = rstart;
1111   for (i = 0; i < mbs; i++) {
1112     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1113     countA = ai[i + 1] - ai[i];
1114     countB = bi[i + 1] - bi[i];
1115     bjj    = bj + bi[i];
1116     v1     = av + ai[i] * bs2;
1117     v2     = bv + bi[i] * bs2;
1118 
1119     if (bs > 1) {
1120       /* A-part */
1121       for (j = 0; j < countA; j++) {
1122         for (k = 0; k < bs; k++) {
1123           for (m = 0; m < bs; m++) {
1124             if (rstart + ajj[j] * bs > irow || k >= m) {
1125               if (reuse == MAT_INITIAL_MATRIX) {
1126                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128               }
1129               val[jj++] = v1[j * bs2 + m + k * bs];
1130             }
1131           }
1132         }
1133       }
1134 
1135       /* B-part */
1136       for (j = 0; j < countB; j++) {
1137         for (k = 0; k < bs; k++) {
1138           for (m = 0; m < bs; m++) {
1139             if (reuse == MAT_INITIAL_MATRIX) {
1140               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142             }
1143             val[jj++] = v2[j * bs2 + m + k * bs];
1144           }
1145         }
1146       }
1147     } else {
1148       /* A-part */
1149       for (j = 0; j < countA; j++) {
1150         if (reuse == MAT_INITIAL_MATRIX) {
1151           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153         }
1154         val[jj++] = v1[j];
1155       }
1156 
1157       /* B-part */
1158       for (j = 0; j < countB; j++) {
1159         if (reuse == MAT_INITIAL_MATRIX) {
1160           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162         }
1163         val[jj++] = v2[j];
1164       }
1165     }
1166     irow += bs;
1167   }
1168   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173 {
1174   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176   PetscMUMPSInt     *row, *col;
1177   const PetscScalar *av, *bv, *v1, *v2;
1178   PetscScalar       *val;
1179   Mat                Ad, Ao;
1180   Mat_SeqAIJ        *aa;
1181   Mat_SeqAIJ        *bb;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1187 
1188   aa = (Mat_SeqAIJ *)Ad->data;
1189   bb = (Mat_SeqAIJ *)Ao->data;
1190   ai = aa->i;
1191   aj = aa->j;
1192   bi = bb->i;
1193   bj = bb->j;
1194 
1195   rstart = A->rmap->rstart;
1196   cstart = A->cmap->rstart;
1197 
1198   if (reuse == MAT_INITIAL_MATRIX) {
1199     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201     PetscCall(PetscMalloc1(nz, &val));
1202     mumps->nnz = nz;
1203     mumps->irn = row;
1204     mumps->jcn = col;
1205     mumps->val = mumps->val_alloc = val;
1206   } else {
1207     val = mumps->val;
1208   }
1209 
1210   jj   = 0;
1211   irow = rstart;
1212   for (i = 0; i < m; i++) {
1213     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1214     countA = ai[i + 1] - ai[i];
1215     countB = bi[i + 1] - bi[i];
1216     bjj    = bj + bi[i];
1217     v1     = av + ai[i];
1218     v2     = bv + bi[i];
1219 
1220     /* A-part */
1221     for (j = 0; j < countA; j++) {
1222       if (reuse == MAT_INITIAL_MATRIX) {
1223         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225       }
1226       val[jj++] = v1[j];
1227     }
1228 
1229     /* B-part */
1230     for (j = 0; j < countB; j++) {
1231       if (reuse == MAT_INITIAL_MATRIX) {
1232         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234       }
1235       val[jj++] = v2[j];
1236     }
1237     irow++;
1238   }
1239   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245 {
1246   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1247   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1248   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1249   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251   const PetscInt     bs2 = mat->bs2;
1252   PetscInt           bs;
1253   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1254   PetscMUMPSInt     *row, *col;
1255   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256   PetscScalar       *val;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(MatGetBlockSize(A, &bs));
1260   if (reuse == MAT_INITIAL_MATRIX) {
1261     nz = bs2 * (aa->nz + bb->nz);
1262     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263     PetscCall(PetscMalloc1(nz, &val));
1264     mumps->nnz = nz;
1265     mumps->irn = row;
1266     mumps->jcn = col;
1267     mumps->val = mumps->val_alloc = val;
1268   } else {
1269     val = mumps->val;
1270   }
1271 
1272   jj   = 0;
1273   irow = rstart;
1274   for (i = 0; i < mbs; i++) {
1275     countA = ai[i + 1] - ai[i];
1276     countB = bi[i + 1] - bi[i];
1277     ajj    = aj + ai[i];
1278     bjj    = bj + bi[i];
1279     v1     = av + bs2 * ai[i];
1280     v2     = bv + bs2 * bi[i];
1281 
1282     idx = 0;
1283     /* A-part */
1284     for (k = 0; k < countA; k++) {
1285       for (j = 0; j < bs; j++) {
1286         for (n = 0; n < bs; n++) {
1287           if (reuse == MAT_INITIAL_MATRIX) {
1288             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290           }
1291           val[jj++] = v1[idx++];
1292         }
1293       }
1294     }
1295 
1296     idx = 0;
1297     /* B-part */
1298     for (k = 0; k < countB; k++) {
1299       for (j = 0; j < bs; j++) {
1300         for (n = 0; n < bs; n++) {
1301           if (reuse == MAT_INITIAL_MATRIX) {
1302             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304           }
1305           val[jj++] = v2[idx++];
1306         }
1307       }
1308     }
1309     irow += bs;
1310   }
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315 {
1316   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318   PetscMUMPSInt     *row, *col;
1319   const PetscScalar *av, *bv, *v1, *v2;
1320   PetscScalar       *val;
1321   Mat                Ad, Ao;
1322   Mat_SeqAIJ        *aa;
1323   Mat_SeqAIJ        *bb;
1324 #if defined(PETSC_USE_COMPLEX)
1325   PetscBool hermitian, isset;
1326 #endif
1327 
1328   PetscFunctionBegin;
1329 #if defined(PETSC_USE_COMPLEX)
1330   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332 #endif
1333   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1336 
1337   aa = (Mat_SeqAIJ *)Ad->data;
1338   bb = (Mat_SeqAIJ *)Ao->data;
1339   ai = aa->i;
1340   aj = aa->j;
1341   bi = bb->i;
1342   bj = bb->j;
1343   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344   rstart = A->rmap->rstart;
1345 
1346   if (reuse == MAT_INITIAL_MATRIX) {
1347     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348     nzb = 0; /* num of upper triangular entries in mat->B */
1349     for (i = 0; i < m; i++) {
1350       nza += (ai[i + 1] - adiag[i]);
1351       countB = bi[i + 1] - bi[i];
1352       bjj    = bj + bi[i];
1353       for (j = 0; j < countB; j++) {
1354         if (garray[bjj[j]] > rstart) nzb++;
1355       }
1356     }
1357 
1358     nz = nza + nzb; /* total nz of upper triangular part of mat */
1359     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360     PetscCall(PetscMalloc1(nz, &val));
1361     mumps->nnz = nz;
1362     mumps->irn = row;
1363     mumps->jcn = col;
1364     mumps->val = mumps->val_alloc = val;
1365   } else {
1366     val = mumps->val;
1367   }
1368 
1369   jj   = 0;
1370   irow = rstart;
1371   for (i = 0; i < m; i++) {
1372     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373     v1     = av + adiag[i];
1374     countA = ai[i + 1] - adiag[i];
1375     countB = bi[i + 1] - bi[i];
1376     bjj    = bj + bi[i];
1377     v2     = bv + bi[i];
1378 
1379     /* A-part */
1380     for (j = 0; j < countA; j++) {
1381       if (reuse == MAT_INITIAL_MATRIX) {
1382         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384       }
1385       val[jj++] = v1[j];
1386     }
1387 
1388     /* B-part */
1389     for (j = 0; j < countB; j++) {
1390       if (garray[bjj[j]] > rstart) {
1391         if (reuse == MAT_INITIAL_MATRIX) {
1392           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394         }
1395         val[jj++] = v2[j];
1396       }
1397     }
1398     irow++;
1399   }
1400   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 
1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406 {
1407   const PetscScalar *av;
1408   const PetscInt     M = A->rmap->n;
1409   PetscCount         i;
1410   PetscMUMPSInt     *row, *col;
1411   Vec                v;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(MatDiagonalGetDiagonal(A, &v));
1415   PetscCall(VecGetArrayRead(v, &av));
1416   if (reuse == MAT_INITIAL_MATRIX) {
1417     PetscCall(PetscMalloc2(M, &row, M, &col));
1418     for (i = 0; i < M; i++) {
1419       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420       col[i] = row[i];
1421     }
1422     mumps->val = (PetscScalar *)av;
1423     mumps->irn = row;
1424     mumps->jcn = col;
1425     mumps->nnz = M;
1426   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428   PetscCall(VecRestoreArrayRead(v, &av));
1429   PetscFunctionReturn(PETSC_SUCCESS);
1430 }
1431 
1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433 {
1434   PetscScalar   *v;
1435   const PetscInt m = A->rmap->n, N = A->cmap->N;
1436   PetscInt       lda;
1437   PetscCount     i, j;
1438   PetscMUMPSInt *row, *col;
1439 
1440   PetscFunctionBegin;
1441   PetscCall(MatDenseGetArray(A, &v));
1442   PetscCall(MatDenseGetLDA(A, &lda));
1443   if (reuse == MAT_INITIAL_MATRIX) {
1444     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445     for (i = 0; i < m; i++) {
1446       col[i] = 0;
1447       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448     }
1449     for (j = 1; j < N; j++) {
1450       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452     }
1453     if (lda == m) mumps->val = v;
1454     else {
1455       PetscCall(PetscMalloc1(m * N, &mumps->val));
1456       mumps->val_alloc = mumps->val;
1457       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458     }
1459     mumps->irn = row;
1460     mumps->jcn = col;
1461     mumps->nnz = m * N;
1462   } else {
1463     if (lda == m && !mumps->nest_vals) mumps->val = v;
1464     else {
1465       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466     }
1467   }
1468   PetscCall(MatDenseRestoreArray(A, &v));
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474 // and its rows and columns permuted
1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477 {
1478   Mat         A;
1479   PetscScalar s[2];
1480   PetscBool   isTrans, isHTrans, compare;
1481 
1482   PetscFunctionBegin;
1483   do {
1484     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485     if (isTrans) {
1486       PetscCall(MatTransposeGetMat(*sub, &A));
1487       isHTrans = PETSC_FALSE;
1488     } else {
1489       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491     }
1492     compare = (PetscBool)(isTrans || isHTrans);
1493     if (compare) {
1494       if (vshift && vscale) {
1495         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496         if (!*conjugate) {
1497           *vshift += s[0] * *vscale;
1498           *vscale *= s[1];
1499         } else {
1500           *vshift += PetscConj(s[0]) * *vscale;
1501           *vscale *= PetscConj(s[1]);
1502         }
1503       }
1504       if (swap) *swap = (PetscBool)!*swap;
1505       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506       *sub = A;
1507     }
1508   } while (compare);
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513 {
1514   Mat     **mats;
1515   PetscInt  nr, nc;
1516   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520   if (reuse == MAT_INITIAL_MATRIX) {
1521     PetscMUMPSInt *irns, *jcns;
1522     PetscScalar   *vals;
1523     PetscCount     totnnz, cumnnz, maxnnz;
1524     PetscInt      *pjcns_w, Mbs = 0;
1525     IS            *rows, *cols;
1526     PetscInt     **rows_idx, **cols_idx;
1527 
1528     cumnnz = 0;
1529     maxnnz = 0;
1530     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531     for (PetscInt r = 0; r < nr; r++) {
1532       for (PetscInt c = 0; c < nc; c++) {
1533         Mat sub = mats[r][c];
1534 
1535         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537         if (sub) {
1538           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540           MatInfo   info;
1541 
1542           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1551 
1552           if (chol) {
1553             if (r == c) {
1554               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560             } else {
1561               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567             }
1568           } else {
1569             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575           }
1576           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580           maxnnz = PetscMax(maxnnz, info.nz_used);
1581         }
1582       }
1583     }
1584 
1585     /* Allocate total COO */
1586     totnnz = cumnnz;
1587     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588     PetscCall(PetscMalloc1(totnnz, &vals));
1589 
1590     /* Handle rows and column maps
1591        We directly map rows and use an SF for the columns */
1592     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593     PetscCall(MatNestGetISs(A, rows, cols));
1594     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597     else (void)maxnnz;
1598 
1599     cumnnz = 0;
1600     for (PetscInt r = 0; r < nr; r++) {
1601       for (PetscInt c = 0; c < nc; c++) {
1602         Mat             sub    = mats[r][c];
1603         const PetscInt *ridx   = rows_idx[r];
1604         const PetscInt *cidx   = cols_idx[c];
1605         PetscScalar     vscale = 1.0, vshift = 0.0;
1606         PetscInt        rst, size, bs;
1607         PetscSF         csf;
1608         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609         PetscLayout     cmap;
1610         PetscInt        innz;
1611 
1612         mumps->nest_vals_start[r * nc + c] = cumnnz;
1613         if (c == r) {
1614           PetscCall(ISGetSize(rows[r], &size));
1615           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617           }
1618           PetscCall(MatGetBlockSize(sub, &bs));
1619           Mbs += size / bs;
1620         }
1621         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1622 
1623         /* Extract inner blocks if needed */
1624         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1626 
1627         /* Get column layout to map off-process columns */
1628         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1629 
1630         /* Get row start to map on-process rows */
1631         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1632 
1633         /* Directly use the mumps datastructure and use C ordering for now */
1634         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1635 
1636         /* Swap the role of rows and columns indices for transposed blocks
1637            since we need values with global final ordering */
1638         if (swap) {
1639           cidx = rows_idx[r];
1640           ridx = cols_idx[c];
1641         }
1642 
1643         /* Communicate column indices
1644            This could have been done with a single SF but it would have complicated the code a lot.
1645            But since we do it only once, we pay the price of setting up an SF for each block */
1646         if (PetscDefined(USE_64BIT_INDICES)) {
1647           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650         PetscCall(PetscIntCast(mumps->nnz, &innz));
1651         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654         PetscCall(PetscSFDestroy(&csf));
1655 
1656         /* Import indices: use direct map for rows and mapped indices for columns */
1657         if (swap) {
1658           for (PetscInt k = 0; k < mumps->nnz; k++) {
1659             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661           }
1662         } else {
1663           for (PetscInt k = 0; k < mumps->nnz; k++) {
1664             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666           }
1667         }
1668 
1669         /* Import values to full COO */
1670         if (conjugate) { /* conjugate the entries */
1671           PetscScalar *v = vals + cumnnz;
1672           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673         } else if (vscale != 1.0) {
1674           PetscScalar *v = vals + cumnnz;
1675           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1677 
1678         /* Shift new starting point and sanity check */
1679         cumnnz += mumps->nnz;
1680         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1681 
1682         /* Free scratch memory */
1683         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684         PetscCall(PetscFree(mumps->val_alloc));
1685         mumps->val = NULL;
1686         mumps->nnz = 0;
1687       }
1688     }
1689     if (mumps->id.ICNTL(15) == 1) {
1690       if (Mbs != A->rmap->N) {
1691         PetscMPIInt rank, size;
1692 
1693         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695         if (rank == 0) {
1696           PetscInt shift = 0;
1697 
1698           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699           PetscCall(PetscFree(mumps->id.blkptr));
1700           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701           mumps->id.blkptr[0] = 1;
1702           for (PetscInt i = 0; i < size; ++i) {
1703             for (PetscInt r = 0; r < nr; r++) {
1704               Mat             sub = mats[r][r];
1705               const PetscInt *ranges;
1706               PetscInt        bs;
1707 
1708               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710               PetscCall(MatGetBlockSize(sub, &bs));
1711               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712               shift += (ranges[i + 1] - ranges[i]) / bs;
1713             }
1714           }
1715         }
1716       } else mumps->id.ICNTL(15) = 0;
1717     }
1718     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723     mumps->nest_vals_start[nr * nc] = cumnnz;
1724 
1725     /* Set pointers for final MUMPS data structure */
1726     mumps->nest_vals = vals;
1727     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728     mumps->val       = vals;
1729     mumps->irn       = irns;
1730     mumps->jcn       = jcns;
1731     mumps->nnz       = cumnnz;
1732   } else {
1733     PetscScalar *oval = mumps->nest_vals;
1734     for (PetscInt r = 0; r < nr; r++) {
1735       for (PetscInt c = 0; c < nc; c++) {
1736         PetscBool   conjugate = PETSC_FALSE;
1737         Mat         sub       = mats[r][c];
1738         PetscScalar vscale = 1.0, vshift = 0.0;
1739         PetscInt    midx = r * nc + c;
1740 
1741         if (!mumps->nest_convert_to_triples[midx]) continue;
1742         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744         mumps->val = oval + mumps->nest_vals_start[midx];
1745         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746         if (conjugate) {
1747           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749         } else if (vscale != 1.0) {
1750           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752         }
1753       }
1754     }
1755     mumps->val = oval;
1756   }
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
1760 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761 {
1762   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1763 
1764   PetscFunctionBegin;
1765   PetscCall(PetscFree(mumps->id.isol_loc));
1766   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768   PetscCall(VecDestroy(&mumps->b_seq));
1769   PetscCall(VecDestroy(&mumps->x_seq));
1770   PetscCall(PetscFree(mumps->id.perm_in));
1771   PetscCall(PetscFree(mumps->id.blkvar));
1772   PetscCall(PetscFree(mumps->id.blkptr));
1773   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774   PetscCall(PetscFree(mumps->val_alloc));
1775   PetscCall(PetscFree(mumps->info));
1776   PetscCall(PetscFree(mumps->ICNTL_pre));
1777   PetscCall(PetscFree(mumps->CNTL_pre));
1778   PetscCall(MatMumpsResetSchur_Private(mumps));
1779   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780     mumps->id.job = JOB_END;
1781     PetscMUMPS_c(mumps);
1782     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783     if (mumps->mumps_comm != MPI_COMM_NULL) {
1784       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786     }
1787   }
1788   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790   if (mumps->use_petsc_omp_support) {
1791     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794   }
1795 #endif
1796   PetscCall(PetscFree(mumps->ia_alloc));
1797   PetscCall(PetscFree(mumps->ja_alloc));
1798   PetscCall(PetscFree(mumps->recvcount));
1799   PetscCall(PetscFree(mumps->reqs));
1800   PetscCall(PetscFree(mumps->irhs_loc));
1801   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802   PetscCall(PetscFree(mumps->nest_vals));
1803   PetscCall(PetscFree(A->data));
1804 
1805   /* clear composed functions */
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826 {
1827   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1828   const PetscMPIInt ompsize = mumps->omp_comm_size;
1829   PetscInt          i, m, M, rstart;
1830 
1831   PetscFunctionBegin;
1832   PetscCall(MatGetSize(A, &M, NULL));
1833   PetscCall(MatGetLocalSize(A, &m, NULL));
1834   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835   if (ompsize == 1) {
1836     if (!mumps->irhs_loc) {
1837       mumps->nloc_rhs = (PetscMUMPSInt)m;
1838       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841     }
1842     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843   } else {
1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845     const PetscInt *ranges;
1846     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1847     MPI_Group       petsc_group, omp_group;
1848     PetscScalar    *recvbuf = NULL;
1849 
1850     if (mumps->is_omp_master) {
1851       /* Lazily initialize the omp stuff for distributed rhs */
1852       if (!mumps->irhs_loc) {
1853         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1859 
1860         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861         mumps->nloc_rhs = 0;
1862         PetscCall(MatGetOwnershipRanges(A, &ranges));
1863         for (j = 0; j < ompsize; j++) {
1864           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865           mumps->nloc_rhs += mumps->rhs_nrow[j];
1866         }
1867         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868         for (j = k = 0; j < ompsize; j++) {
1869           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870         }
1871 
1872         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873         PetscCallMPI(MPI_Group_free(&petsc_group));
1874         PetscCallMPI(MPI_Group_free(&omp_group));
1875       }
1876 
1877       /* Realloc buffers when current nrhs is bigger than what we have met */
1878       if (nrhs > mumps->max_nrhs) {
1879         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881         mumps->max_nrhs = nrhs;
1882       }
1883 
1884       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886       mumps->rhs_disps[0] = 0;
1887       for (j = 1; j < ompsize; j++) {
1888         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890       }
1891       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892     }
1893 
1894     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1896 
1897     if (mumps->is_omp_master) {
1898       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900         for (j = 0; j < ompsize; j++) {
1901           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902           dst                    = dstbase;
1903           for (i = 0; i < nrhs; i++) {
1904             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905             src += mumps->rhs_nrow[j];
1906             dst += mumps->nloc_rhs;
1907           }
1908           dstbase += mumps->rhs_nrow[j];
1909         }
1910       }
1911       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912     }
1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914   }
1915   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1916   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917   mumps->id.lrhs_loc = mumps->nloc_rhs;
1918   mumps->id.irhs_loc = mumps->irhs_loc;
1919   PetscFunctionReturn(PETSC_SUCCESS);
1920 }
1921 
1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923 {
1924   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1925   const PetscScalar *barray = NULL;
1926   PetscScalar       *array;
1927   IS                 is_iden, is_petsc;
1928   PetscInt           i;
1929   PetscBool          second_solve = PETSC_FALSE;
1930   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1934                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1935                                    &cite1));
1936   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1937                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1938                                    &cite2));
1939 
1940   PetscCall(VecFlag(x, A->factorerrortype));
1941   if (A->factorerrortype) {
1942     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943     PetscFunctionReturn(PETSC_SUCCESS);
1944   }
1945 
1946   mumps->id.nrhs = 1;
1947   if (mumps->petsc_size > 1) {
1948     if (mumps->ICNTL20 == 10) {
1949       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950       PetscCall(VecGetArrayRead(b, &barray));
1951       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952     } else {
1953       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956       if (!mumps->myid) {
1957         PetscCall(VecGetArray(mumps->b_seq, &array));
1958         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959       }
1960     }
1961   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962     mumps->id.ICNTL(20) = 0;
1963     PetscCall(VecCopy(b, x));
1964     PetscCall(VecGetArray(x, &array));
1965     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966   }
1967 
1968   /*
1969      handle condensation step of Schur complement (if any)
1970      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974   */
1975   if (mumps->id.size_schur > 0) {
1976     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978       second_solve = PETSC_TRUE;
1979       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1981     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982   }
1983 
1984   mumps->id.job = JOB_SOLVE;
1985   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1987 
1988   /* handle expansion step of Schur complement (if any) */
1989   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991     PetscCall(MatMumpsSolveSchur_Private(A));
1992     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993   }
1994 
1995   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999     }
2000     if (!mumps->scat_sol) { /* create scatter scat_sol */
2001       PetscInt *isol2_loc = NULL;
2002       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
2005       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007       PetscCall(ISDestroy(&is_iden));
2008       PetscCall(ISDestroy(&is_petsc));
2009       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010     }
2011 
2012     PetscScalar *xarray;
2013     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2018 
2019     if (mumps->ICNTL20 == 10) { // distributed RHS
2020       PetscCall(VecRestoreArrayRead(b, &barray));
2021     } else if (!mumps->myid) { // centralized RHS
2022       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023     }
2024   } else {
2025     // id.rhs has the solution in mumps precision
2026     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027     PetscCall(VecRestoreArray(x, &array));
2028   }
2029 
2030   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035 {
2036   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2037   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2038 
2039   PetscFunctionBegin;
2040   mumps->id.ICNTL(9) = 0;
2041   PetscCall(MatSolve_MUMPS(A, b, x));
2042   mumps->id.ICNTL(9) = value;
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047 {
2048   Mat                Bt = NULL;
2049   PetscBool          denseX, denseB, flg, flgT;
2050   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2051   PetscInt           i, nrhs, M, nrhsM;
2052   PetscScalar       *array;
2053   const PetscScalar *barray;
2054   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2056   PetscScalar       *sol_loc;
2057   void              *sol_loc_save;
2058   PetscCount         sol_loc_len_save;
2059   IS                 is_to, is_from;
2060   PetscInt           k, proc, j, m, myrstart;
2061   const PetscInt    *rstart;
2062   Vec                v_mpi, msol_loc;
2063   VecScatter         scat_sol;
2064   Vec                b_seq;
2065   VecScatter         scat_rhs;
2066   PetscScalar       *aa;
2067   PetscInt           spnr, *ia, *ja;
2068   Mat_MPIAIJ        *b = NULL;
2069 
2070   PetscFunctionBegin;
2071   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2073 
2074   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2075 
2076   if (denseB) {
2077     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078     mumps->id.ICNTL(20) = 0; /* dense RHS */
2079   } else {                   /* sparse B */
2080     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084     /* input B is transpose of actual RHS matrix,
2085      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086     PetscCall(MatTransposeGetMat(B, &Bt));
2087     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088   }
2089 
2090   PetscCall(MatGetSize(B, &M, &nrhs));
2091   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093   mumps->id.lrhs = (PetscMUMPSInt)M;
2094 
2095   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096     PetscScalar *aa;
2097     PetscInt     spnr, *ia, *ja;
2098     PetscBool    second_solve = PETSC_FALSE;
2099 
2100     PetscCall(MatDenseGetArray(X, &array));
2101     if (denseB) {
2102       /* copy B to X */
2103       PetscCall(MatDenseGetArrayRead(B, &barray));
2104       PetscCall(PetscArraycpy(array, barray, nrhsM));
2105       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106     } else { /* sparse B */
2107       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112     }
2113     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2114 
2115     /* handle condensation step of Schur complement (if any) */
2116     if (mumps->id.size_schur > 0) {
2117       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118         second_solve = PETSC_TRUE;
2119         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2121       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122     }
2123 
2124     mumps->id.job = JOB_SOLVE;
2125     PetscMUMPS_c(mumps);
2126     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2127 
2128     /* handle expansion step of Schur complement (if any) */
2129     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131       PetscCall(MatMumpsSolveSchur_Private(A));
2132       for (j = 0; j < nrhs; ++j)
2133         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134     }
2135 
2136     if (!denseB) { /* sparse B, restore ia, ja */
2137       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140     }
2141 
2142     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144     PetscCall(MatDenseRestoreArray(X, &array));
2145     PetscFunctionReturn(PETSC_SUCCESS);
2146   }
2147 
2148   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2150 
2151   /* create msol_loc to hold mumps local solution */
2152   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153   sol_loc_save          = mumps->id.sol_loc;
2154   sol_loc_len_save      = mumps->id.sol_loc_len;
2155   mumps->id.isol_loc    = NULL; // an init state
2156   mumps->id.sol_loc     = NULL;
2157   mumps->id.sol_loc_len = 0;
2158 
2159   lsol_loc = mumps->id.lsol_loc;
2160   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163   mumps->id.isol_loc = isol_loc;
2164 
2165   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2166 
2167   if (denseB) {
2168     if (mumps->ICNTL20 == 10) {
2169       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170       PetscCall(MatDenseGetArrayRead(B, &barray));
2171       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173       PetscCall(MatGetLocalSize(B, &m, NULL));
2174       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175     } else {
2176       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179         0, re-arrange B into desired order, which is a local operation.
2180       */
2181 
2182       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183       /* wrap dense rhs matrix B into a vector v_mpi */
2184       PetscCall(MatGetLocalSize(B, &m, NULL));
2185       PetscCall(MatDenseGetArrayRead(B, &barray));
2186       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2188 
2189       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190       if (!mumps->myid) {
2191         PetscInt *idx;
2192         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193         PetscCall(PetscMalloc1(nrhsM, &idx));
2194         PetscCall(MatGetOwnershipRanges(B, &rstart));
2195         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196           for (j = 0; j < nrhs; j++) {
2197             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198           }
2199         }
2200 
2201         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204       } else {
2205         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208       }
2209 
2210       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(ISDestroy(&is_to));
2213       PetscCall(ISDestroy(&is_from));
2214       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2215 
2216       if (!mumps->myid) { /* define rhs on the host */
2217         PetscCall(VecGetArrayRead(b_seq, &barray));
2218         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220       }
2221     }
2222   } else { /* sparse B */
2223     b = (Mat_MPIAIJ *)Bt->data;
2224 
2225     /* wrap dense X into a vector v_mpi */
2226     PetscCall(MatGetLocalSize(X, &m, NULL));
2227     PetscCall(MatDenseGetArrayRead(X, &barray));
2228     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229     PetscCall(MatDenseRestoreArrayRead(X, &barray));
2230 
2231     if (!mumps->myid) {
2232       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237     } else {
2238       mumps->id.irhs_ptr    = NULL;
2239       mumps->id.irhs_sparse = NULL;
2240       mumps->id.nz_rhs      = 0;
2241       if (mumps->id.rhs_sparse_len) {
2242         PetscCall(PetscFree(mumps->id.rhs_sparse));
2243         mumps->id.rhs_sparse_len = 0;
2244       }
2245     }
2246   }
2247 
2248   /* solve phase */
2249   mumps->id.job = JOB_SOLVE;
2250   PetscMUMPS_c(mumps);
2251   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2252 
2253   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254   PetscCall(MatDenseGetArray(X, &array));
2255   PetscCall(VecPlaceArray(v_mpi, array));
2256 
2257   /* create scatter scat_sol */
2258   PetscCall(MatGetOwnershipRanges(X, &rstart));
2259   /* iidx: index for scatter mumps solution to PETSc X */
2260 
2261   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263   for (i = 0; i < lsol_loc; i++) {
2264     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2265 
2266     for (proc = 0; proc < mumps->petsc_size; proc++) {
2267       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268         myrstart = rstart[proc];
2269         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2270         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2271         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272         break;
2273       }
2274     }
2275 
2276     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277   }
2278   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282   PetscCall(ISDestroy(&is_from));
2283   PetscCall(ISDestroy(&is_to));
2284   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285   PetscCall(MatDenseRestoreArray(X, &array));
2286 
2287   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288     mumps->id.sol_loc_len = 0;
2289     PetscCall(PetscFree(mumps->id.sol_loc));
2290   }
2291 
2292   // restore old values
2293   mumps->id.sol_loc     = sol_loc_save;
2294   mumps->id.sol_loc_len = sol_loc_len_save;
2295   mumps->id.isol_loc    = isol_loc_save;
2296 
2297   PetscCall(PetscFree2(sol_loc, isol_loc));
2298   PetscCall(PetscFree(idxx));
2299   PetscCall(VecDestroy(&msol_loc));
2300   PetscCall(VecDestroy(&v_mpi));
2301   if (!denseB) {
2302     if (!mumps->myid) {
2303       b = (Mat_MPIAIJ *)Bt->data;
2304       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307     }
2308   } else {
2309     if (mumps->ICNTL20 == 0) {
2310       PetscCall(VecDestroy(&b_seq));
2311       PetscCall(VecScatterDestroy(&scat_rhs));
2312     }
2313   }
2314   PetscCall(VecScatterDestroy(&scat_sol));
2315   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320 {
2321   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2322   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2323 
2324   PetscFunctionBegin;
2325   mumps->id.ICNTL(9) = 0;
2326   PetscCall(MatMatSolve_MUMPS(A, B, X));
2327   mumps->id.ICNTL(9) = value;
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332 {
2333   PetscBool flg;
2334   Mat       B;
2335 
2336   PetscFunctionBegin;
2337   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2339 
2340   /* Create B=Bt^T that uses Bt's data structure */
2341   PetscCall(MatCreateTranspose(Bt, &B));
2342 
2343   PetscCall(MatMatSolve_MUMPS(A, B, X));
2344   PetscCall(MatDestroy(&B));
2345   PetscFunctionReturn(PETSC_SUCCESS);
2346 }
2347 
2348 #if !defined(PETSC_USE_COMPLEX)
2349 /*
2350   input:
2351    F:        numeric factor
2352   output:
2353    nneg:     total number of negative pivots
2354    nzero:    total number of zero pivots
2355    npos:     (global dimension of F) - nneg - nzero
2356 */
2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358 {
2359   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2360   PetscMPIInt size;
2361 
2362   PetscFunctionBegin;
2363   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2366 
2367   if (nneg) *nneg = mumps->id.INFOG(12);
2368   if (nzero || npos) {
2369     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370     if (nzero) *nzero = mumps->id.INFOG(28);
2371     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372   }
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 #endif
2376 
2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378 {
2379   PetscMPIInt    nreqs;
2380   PetscMUMPSInt *irn, *jcn;
2381   PetscMPIInt    count;
2382   PetscCount     totnnz, remain;
2383   const PetscInt osize = mumps->omp_comm_size;
2384   PetscScalar   *val;
2385 
2386   PetscFunctionBegin;
2387   if (osize > 1) {
2388     if (reuse == MAT_INITIAL_MATRIX) {
2389       /* master first gathers counts of nonzeros to receive */
2390       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2392 
2393       /* Then each computes number of send/recvs */
2394       if (mumps->is_omp_master) {
2395         /* Start from 1 since self communication is not done in MPI */
2396         nreqs = 0;
2397         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398       } else {
2399         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400       }
2401       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2402 
2403       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407        */
2408       nreqs = 0; /* counter for actual send/recvs */
2409       if (mumps->is_omp_master) {
2410         totnnz = 0;
2411 
2412         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414         PetscCall(PetscMalloc1(totnnz, &val));
2415 
2416         /* Self communication */
2417         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2420 
2421         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423         PetscCall(PetscFree(mumps->val_alloc));
2424         mumps->nnz = totnnz;
2425         mumps->irn = irn;
2426         mumps->jcn = jcn;
2427         mumps->val = mumps->val_alloc = val;
2428 
2429         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430         jcn += mumps->recvcount[0];
2431         val += mumps->recvcount[0];
2432 
2433         /* Remote communication */
2434         for (PetscMPIInt i = 1; i < osize; i++) {
2435           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436           remain = mumps->recvcount[i] - count;
2437           while (count > 0) {
2438             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441             irn += count;
2442             jcn += count;
2443             val += count;
2444             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445             remain -= count;
2446           }
2447         }
2448       } else {
2449         irn    = mumps->irn;
2450         jcn    = mumps->jcn;
2451         val    = mumps->val;
2452         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453         remain = mumps->nnz - count;
2454         while (count > 0) {
2455           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458           irn += count;
2459           jcn += count;
2460           val += count;
2461           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462           remain -= count;
2463         }
2464       }
2465     } else {
2466       nreqs = 0;
2467       if (mumps->is_omp_master) {
2468         val = mumps->val + mumps->recvcount[0];
2469         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471           remain = mumps->recvcount[i] - count;
2472           while (count > 0) {
2473             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474             val += count;
2475             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476             remain -= count;
2477           }
2478         }
2479       } else {
2480         val    = mumps->val;
2481         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482         remain = mumps->nnz - count;
2483         while (count > 0) {
2484           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485           val += count;
2486           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487           remain -= count;
2488         }
2489       }
2490     }
2491     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493   }
2494   PetscFunctionReturn(PETSC_SUCCESS);
2495 }
2496 
2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498 {
2499   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2500 
2501   PetscFunctionBegin;
2502   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505     PetscFunctionReturn(PETSC_SUCCESS);
2506   }
2507 
2508   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2510 
2511   /* numerical factorization phase */
2512   mumps->id.job = JOB_FACTNUMERIC;
2513   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515   } else {
2516     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517   }
2518 
2519   if (F->schur) {
2520     const PetscScalar *array;
2521     MUMPS_INT          size = mumps->id.size_schur;
2522     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525   }
2526 
2527   if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix)));
2528 
2529   PetscMUMPS_c(mumps);
2530   if (mumps->id.INFOG(1) < 0) {
2531     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2532     if (mumps->id.INFOG(1) == -10) {
2533       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2535     } else if (mumps->id.INFOG(1) == -13) {
2536       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2537       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2538     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2539       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2540       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2541     } else {
2542       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2543       F->factorerrortype = MAT_FACTOR_OTHER;
2544     }
2545   }
2546   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2547 
2548   F->assembled = PETSC_TRUE;
2549 
2550   if (F->schur) { /* reset Schur status to unfactored */
2551 #if defined(PETSC_HAVE_CUDA)
2552     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2553 #endif
2554     PetscScalar *array;
2555     PetscCall(MatDenseGetArray(F->schur, &array));
2556     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2557     PetscCall(MatDenseRestoreArray(F->schur, &array));
2558     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2559       mumps->id.ICNTL(19) = 2;
2560       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2561     }
2562     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2563   }
2564 
2565   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2566   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2567 
2568   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2569   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2570   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2571   // which the solution (in SOL_loc) is available on the local processor.
2572   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2573   // normally have the same contents for each of these calls. The only exception is the case of
2574   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2575   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2576   if (mumps->petsc_size > 1) {
2577     PetscInt     lsol_loc;
2578     PetscScalar *array;
2579 
2580     /* distributed solution; Create x_seq=sol_loc for repeated use */
2581     if (mumps->x_seq) {
2582       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2583       PetscCall(PetscFree(mumps->id.isol_loc));
2584       PetscCall(VecDestroy(&mumps->x_seq));
2585     }
2586     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2587     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2588     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2589     PetscCall(VecGetArray(mumps->x_seq, &array));
2590     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2591     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2592     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2593   }
2594   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 /* Sets MUMPS options from the options database */
2599 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2600 {
2601   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2602   PetscReal     cntl;
2603   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2604   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2605   PetscBool     flg   = PETSC_FALSE;
2606   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2607   void         *arr;
2608 
2609   PetscFunctionBegin;
2610   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2611   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2612     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2613     PetscInt       nthreads   = 0;
2614     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2615     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2616     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2617 
2618     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2619     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2620     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2621 
2622     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2623     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2624     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2625     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2626     if (mumps->use_petsc_omp_support) {
2627       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2628 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2629       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2630       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2631 #else
2632       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2633               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2634 #endif
2635     } else {
2636       mumps->omp_comm      = PETSC_COMM_SELF;
2637       mumps->mumps_comm    = mumps->petsc_comm;
2638       mumps->is_omp_master = PETSC_TRUE;
2639     }
2640     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2641     mumps->reqs = NULL;
2642     mumps->tag  = 0;
2643 
2644     if (mumps->mumps_comm != MPI_COMM_NULL) {
2645       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2646         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2647         MPI_Comm comm;
2648         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2649         mumps->mumps_comm = comm;
2650       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2651     }
2652 
2653     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2654     mumps->id.job          = JOB_INIT;
2655     mumps->id.par          = 1; /* host participates factorizaton and solve */
2656     mumps->id.sym          = mumps->sym;
2657 
2658     size          = mumps->id.size_schur;
2659     arr           = mumps->id.schur;
2660     listvar_schur = mumps->id.listvar_schur;
2661     nblk          = mumps->id.nblk;
2662     blkvar        = mumps->id.blkvar;
2663     blkptr        = mumps->id.blkptr;
2664     if (PetscDefined(USE_DEBUG)) {
2665       for (PetscInt i = 0; i < size; i++)
2666         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2667                    A->rmap->N);
2668     }
2669 
2670     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2671     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2672     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2673     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2674 
2675     PetscMUMPS_c(mumps);
2676     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2677 
2678     /* set PETSc-MUMPS default options - override MUMPS default */
2679     mumps->id.ICNTL(3) = 0;
2680     mumps->id.ICNTL(4) = 0;
2681     if (mumps->petsc_size == 1) {
2682       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2683       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2684     } else {
2685       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2686       mumps->id.ICNTL(21) = 1; /* distributed solution */
2687     }
2688     if (nblk && blkptr) {
2689       mumps->id.ICNTL(15) = 1;
2690       mumps->id.nblk      = nblk;
2691       mumps->id.blkvar    = blkvar;
2692       mumps->id.blkptr    = blkptr;
2693     } else mumps->id.ICNTL(15) = 0;
2694 
2695     /* restore cached ICNTL and CNTL values */
2696     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2697     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2698 
2699     PetscCall(PetscFree(mumps->ICNTL_pre));
2700     PetscCall(PetscFree(mumps->CNTL_pre));
2701 
2702     if (schur) {
2703       mumps->id.size_schur    = size;
2704       mumps->id.schur_lld     = size;
2705       mumps->id.schur         = arr;
2706       mumps->id.listvar_schur = listvar_schur;
2707       if (mumps->petsc_size > 1) {
2708         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2709 
2710         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2711         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2712         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2713         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2714       } else {
2715         if (F->factortype == MAT_FACTOR_LU) {
2716           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2717         } else {
2718           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2719         }
2720       }
2721       mumps->id.ICNTL(26) = -1;
2722     }
2723 
2724     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2725        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2726      */
2727     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2728     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2729 
2730     mumps->scat_rhs = NULL;
2731     mumps->scat_sol = NULL;
2732   }
2733   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2734   if (flg) mumps->id.ICNTL(1) = icntl;
2735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2736   if (flg) mumps->id.ICNTL(2) = icntl;
2737   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2738   if (flg) mumps->id.ICNTL(3) = icntl;
2739 
2740   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2741   if (flg) mumps->id.ICNTL(4) = icntl;
2742   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2743 
2744   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2745   if (flg) mumps->id.ICNTL(6) = icntl;
2746 
2747   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2748   if (flg) {
2749     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2750     mumps->id.ICNTL(7) = icntl;
2751   }
2752 
2753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2754   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2758   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2759   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2760   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2761   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2762   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2763   if (flg) {
2764     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2765     else if (mumps->id.ICNTL(15) > 0) {
2766       const PetscInt *bsizes;
2767       PetscInt        nblocks, p, *blkptr = NULL;
2768       PetscMPIInt    *recvcounts, *displs, n;
2769       PetscMPIInt     rank, size = 0;
2770 
2771       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2772       flg = PETSC_TRUE;
2773       for (p = 0; p < nblocks; ++p) {
2774         if (bsizes[p] > 1) break;
2775       }
2776       if (p == nblocks) flg = PETSC_FALSE;
2777       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2778       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2779         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2780         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2781         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2782         PetscCall(PetscMPIIntCast(nblocks, &n));
2783         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2784         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2785         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2786         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2787         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2788         if (rank == 0) {
2789           blkptr[0] = 1;
2790           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2791           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2792         }
2793         PetscCall(PetscFree2(recvcounts, displs));
2794         PetscCall(PetscFree(blkptr));
2795       }
2796     }
2797   }
2798   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2799   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2800     PetscCall(MatDestroy(&F->schur));
2801     PetscCall(MatMumpsResetSchur_Private(mumps));
2802   }
2803 
2804   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2805      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2806      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2807      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2808      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2809      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2810    */
2811   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2812 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2813   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2814 #endif
2815   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2816   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2817 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2818   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2819 #endif
2820   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2821 
2822   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg));
2823   if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))'
2824   if (mumps->id.ICNTL(22)) {
2825     // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user.
2826     // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL().
2827     PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL));
2828   }
2829   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2830   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2831   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2832 
2833   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2834   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2835   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2837   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2838   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2840   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2841   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2842   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2843   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2844   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2845   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2846   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2847   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2848   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2849   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2850 
2851   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2852   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2853   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2854   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2855   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2856   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2857   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2858   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2859   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2860   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2861   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2862   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2863 
2864   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2865   if (ninfo) {
2866     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2867     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2868     mumps->ninfo = ninfo;
2869     for (i = 0; i < ninfo; i++) {
2870       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2871       mumps->info[i] = info[i];
2872     }
2873   }
2874   PetscOptionsEnd();
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
2878 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2879 {
2880   PetscFunctionBegin;
2881   if (mumps->id.INFOG(1) < 0) {
2882     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2883     if (mumps->id.INFOG(1) == -6) {
2884       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2885       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2886     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2887       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2888       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2889     } else {
2890       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2891       F->factorerrortype = MAT_FACTOR_OTHER;
2892     }
2893   }
2894   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
2898 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2899 {
2900   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2901   Vec            b;
2902   const PetscInt M = A->rmap->N;
2903 
2904   PetscFunctionBegin;
2905   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2906     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2907     PetscFunctionReturn(PETSC_SUCCESS);
2908   }
2909 
2910   /* Set MUMPS options from the options database */
2911   PetscCall(MatSetFromOptions_MUMPS(F, A));
2912 
2913   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2914   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2915 
2916   /* analysis phase */
2917   mumps->id.job = JOB_FACTSYMBOLIC;
2918   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2919   switch (mumps->id.ICNTL(18)) {
2920   case 0: /* centralized assembled matrix input */
2921     if (!mumps->myid) {
2922       mumps->id.nnz = mumps->nnz;
2923       mumps->id.irn = mumps->irn;
2924       mumps->id.jcn = mumps->jcn;
2925       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2926       if (r && mumps->id.ICNTL(7) == 7) {
2927         mumps->id.ICNTL(7) = 1;
2928         if (!mumps->myid) {
2929           const PetscInt *idx;
2930           PetscInt        i;
2931 
2932           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2933           PetscCall(ISGetIndices(r, &idx));
2934           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2935           PetscCall(ISRestoreIndices(r, &idx));
2936         }
2937       }
2938     }
2939     break;
2940   case 3: /* distributed assembled matrix input (size>1) */
2941     mumps->id.nnz_loc = mumps->nnz;
2942     mumps->id.irn_loc = mumps->irn;
2943     mumps->id.jcn_loc = mumps->jcn;
2944     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2945     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2946       PetscCall(MatCreateVecs(A, NULL, &b));
2947       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2948       PetscCall(VecDestroy(&b));
2949     }
2950     break;
2951   }
2952   PetscMUMPS_c(mumps);
2953   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2954 
2955   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2956   F->ops->solve             = MatSolve_MUMPS;
2957   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2958   F->ops->matsolve          = MatMatSolve_MUMPS;
2959   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2960   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2961 
2962   mumps->matstruc = SAME_NONZERO_PATTERN;
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 /* Note the PETSc r and c permutations are ignored */
2967 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2968 {
2969   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2970   Vec            b;
2971   const PetscInt M = A->rmap->N;
2972 
2973   PetscFunctionBegin;
2974   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2975     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2976     PetscFunctionReturn(PETSC_SUCCESS);
2977   }
2978 
2979   /* Set MUMPS options from the options database */
2980   PetscCall(MatSetFromOptions_MUMPS(F, A));
2981 
2982   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2983   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2984 
2985   /* analysis phase */
2986   mumps->id.job = JOB_FACTSYMBOLIC;
2987   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2988   switch (mumps->id.ICNTL(18)) {
2989   case 0: /* centralized assembled matrix input */
2990     if (!mumps->myid) {
2991       mumps->id.nnz = mumps->nnz;
2992       mumps->id.irn = mumps->irn;
2993       mumps->id.jcn = mumps->jcn;
2994       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2995     }
2996     break;
2997   case 3: /* distributed assembled matrix input (size>1) */
2998     mumps->id.nnz_loc = mumps->nnz;
2999     mumps->id.irn_loc = mumps->irn;
3000     mumps->id.jcn_loc = mumps->jcn;
3001     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3002     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3003       PetscCall(MatCreateVecs(A, NULL, &b));
3004       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3005       PetscCall(VecDestroy(&b));
3006     }
3007     break;
3008   }
3009   PetscMUMPS_c(mumps);
3010   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3011 
3012   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3013   F->ops->solve             = MatSolve_MUMPS;
3014   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3015   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3016 
3017   mumps->matstruc = SAME_NONZERO_PATTERN;
3018   PetscFunctionReturn(PETSC_SUCCESS);
3019 }
3020 
3021 /* Note the PETSc r permutation and factor info are ignored */
3022 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3023 {
3024   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3025   Vec            b;
3026   const PetscInt M = A->rmap->N;
3027 
3028   PetscFunctionBegin;
3029   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3030     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3031     PetscFunctionReturn(PETSC_SUCCESS);
3032   }
3033 
3034   /* Set MUMPS options from the options database */
3035   PetscCall(MatSetFromOptions_MUMPS(F, A));
3036 
3037   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3038   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3039 
3040   /* analysis phase */
3041   mumps->id.job = JOB_FACTSYMBOLIC;
3042   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3043   switch (mumps->id.ICNTL(18)) {
3044   case 0: /* centralized assembled matrix input */
3045     if (!mumps->myid) {
3046       mumps->id.nnz = mumps->nnz;
3047       mumps->id.irn = mumps->irn;
3048       mumps->id.jcn = mumps->jcn;
3049       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3050     }
3051     break;
3052   case 3: /* distributed assembled matrix input (size>1) */
3053     mumps->id.nnz_loc = mumps->nnz;
3054     mumps->id.irn_loc = mumps->irn;
3055     mumps->id.jcn_loc = mumps->jcn;
3056     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3057     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3058       PetscCall(MatCreateVecs(A, NULL, &b));
3059       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3060       PetscCall(VecDestroy(&b));
3061     }
3062     break;
3063   }
3064   PetscMUMPS_c(mumps);
3065   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3066 
3067   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3068   F->ops->solve                 = MatSolve_MUMPS;
3069   F->ops->solvetranspose        = MatSolve_MUMPS;
3070   F->ops->matsolve              = MatMatSolve_MUMPS;
3071   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3072   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3073 #if defined(PETSC_USE_COMPLEX)
3074   F->ops->getinertia = NULL;
3075 #else
3076   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3077 #endif
3078 
3079   mumps->matstruc = SAME_NONZERO_PATTERN;
3080   PetscFunctionReturn(PETSC_SUCCESS);
3081 }
3082 
3083 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3084 {
3085   PetscBool         isascii;
3086   PetscViewerFormat format;
3087   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3088 
3089   PetscFunctionBegin;
3090   /* check if matrix is mumps type */
3091   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3092 
3093   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3094   if (isascii) {
3095     PetscCall(PetscViewerGetFormat(viewer, &format));
3096     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3097       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3098       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3099         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3100         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3101         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3102         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3103         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3104         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3105         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3106         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3107         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3108         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3109         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3110         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3111         if (mumps->id.ICNTL(11) > 0) {
3112           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3113           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3114           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3115           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3116           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3117           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3118         }
3119         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3120         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3121         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3122         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3123         /* ICNTL(15-17) not used */
3124         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3125         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3126         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3127         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3128         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3129         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3130 
3131         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3132         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3133         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3134         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3135         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3136         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
3137 
3138         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3139         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3140         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3141         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3142         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3143         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3144         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3145         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3146         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3147         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3148         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3149 
3150         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3151         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3152         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3153         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3154         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3155         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3156 
3157         /* information local to each processor */
3158         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3159         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3160         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3161         PetscCall(PetscViewerFlush(viewer));
3162         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3163         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3164         PetscCall(PetscViewerFlush(viewer));
3165         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3166         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3167         PetscCall(PetscViewerFlush(viewer));
3168 
3169         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3170         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3171         PetscCall(PetscViewerFlush(viewer));
3172 
3173         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3174         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3175         PetscCall(PetscViewerFlush(viewer));
3176 
3177         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3178         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3179         PetscCall(PetscViewerFlush(viewer));
3180 
3181         if (mumps->ninfo && mumps->ninfo <= 80) {
3182           PetscInt i;
3183           for (i = 0; i < mumps->ninfo; i++) {
3184             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3185             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3186             PetscCall(PetscViewerFlush(viewer));
3187           }
3188         }
3189         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3190       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3191 
3192       if (mumps->myid == 0) { /* information from the host */
3193         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3194         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3195         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3196         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3197 
3198         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3199         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3200         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3202         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3203         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3204         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3206         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3207         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3208         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3210         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3211         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3212         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3213         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3214         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3215         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3216         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3217         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3218         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3219         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3220         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3221         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3222         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3223         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3224         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3225         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3226         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3227         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3228         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3229         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3230         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3231         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3232       }
3233     }
3234   }
3235   PetscFunctionReturn(PETSC_SUCCESS);
3236 }
3237 
3238 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3239 {
3240   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3241 
3242   PetscFunctionBegin;
3243   info->block_size        = 1.0;
3244   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3245   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3246   info->nz_unneeded       = 0.0;
3247   info->assemblies        = 0.0;
3248   info->mallocs           = 0.0;
3249   info->memory            = 0.0;
3250   info->fill_ratio_given  = 0;
3251   info->fill_ratio_needed = 0;
3252   info->factor_mallocs    = 0;
3253   PetscFunctionReturn(PETSC_SUCCESS);
3254 }
3255 
3256 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3257 {
3258   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3259   const PetscScalar *arr;
3260   const PetscInt    *idxs;
3261   PetscInt           size, i;
3262 
3263   PetscFunctionBegin;
3264   PetscCall(ISGetLocalSize(is, &size));
3265   /* Schur complement matrix */
3266   PetscCall(MatDestroy(&F->schur));
3267   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3268   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3269   // don't allocate mumps->id.schur[] now as its precision is yet to know
3270   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3271   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3272   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3273   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3274 
3275   /* MUMPS expects Fortran style indices */
3276   PetscCall(PetscFree(mumps->id.listvar_schur));
3277   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3278   PetscCall(ISGetIndices(is, &idxs));
3279   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3280   PetscCall(ISRestoreIndices(is, &idxs));
3281   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3282   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3283   else mumps->ICNTL26 = -1;
3284   PetscFunctionReturn(PETSC_SUCCESS);
3285 }
3286 
3287 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3288 {
3289   Mat          St;
3290   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3291   PetscScalar *array;
3292   PetscInt     i, j, N = mumps->id.size_schur;
3293 
3294   PetscFunctionBegin;
3295   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3296   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3297   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3298   PetscCall(MatSetType(St, MATDENSE));
3299   PetscCall(MatSetUp(St));
3300   PetscCall(MatDenseGetArray(St, &array));
3301   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3302     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3303       for (i = 0; i < N; i++) {
3304         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3305       }
3306     } else { /* stored by columns */
3307       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3308     }
3309   } else {                          /* either full or lower-triangular (not packed) */
3310     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3311       for (i = 0; i < N; i++) {
3312         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3313       }
3314     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3315       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3316     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3317       for (i = 0; i < N; i++) {
3318         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3319       }
3320     }
3321   }
3322   PetscCall(MatDenseRestoreArray(St, &array));
3323   *S = St;
3324   PetscFunctionReturn(PETSC_SUCCESS);
3325 }
3326 
3327 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3328 {
3329   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3330 
3331   PetscFunctionBegin;
3332   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3333     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3334     for (i = 0; i < nICNTL_pre; ++i)
3335       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3336     if (i == nICNTL_pre) {                             /* not already cached */
3337       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3338       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3339       mumps->ICNTL_pre[0]++;
3340     }
3341     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3342     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3343   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3344   PetscFunctionReturn(PETSC_SUCCESS);
3345 }
3346 
3347 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3348 {
3349   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3350 
3351   PetscFunctionBegin;
3352   if (mumps->id.job == JOB_NULL) {
3353     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3354     *ival = 0;
3355     for (i = 0; i < nICNTL_pre; ++i) {
3356       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3357     }
3358   } else *ival = mumps->id.ICNTL(icntl);
3359   PetscFunctionReturn(PETSC_SUCCESS);
3360 }
3361 
3362 /*@
3363   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3364 
3365   Logically Collective
3366 
3367   Input Parameters:
3368 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3369 . icntl - index of MUMPS parameter array `ICNTL()`
3370 - ival  - value of MUMPS `ICNTL(icntl)`
3371 
3372   Options Database Key:
3373 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3374 
3375   Level: beginner
3376 
3377   Note:
3378   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3379 
3380 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3381 @*/
3382 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3383 {
3384   PetscFunctionBegin;
3385   PetscValidType(F, 1);
3386   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3387   PetscValidLogicalCollectiveInt(F, icntl, 2);
3388   PetscValidLogicalCollectiveInt(F, ival, 3);
3389   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3390   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3391   PetscFunctionReturn(PETSC_SUCCESS);
3392 }
3393 
3394 /*@
3395   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3396 
3397   Logically Collective
3398 
3399   Input Parameters:
3400 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3401 - icntl - index of MUMPS parameter array ICNTL()
3402 
3403   Output Parameter:
3404 . ival - value of MUMPS ICNTL(icntl)
3405 
3406   Level: beginner
3407 
3408 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3409 @*/
3410 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3411 {
3412   PetscFunctionBegin;
3413   PetscValidType(F, 1);
3414   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3415   PetscValidLogicalCollectiveInt(F, icntl, 2);
3416   PetscAssertPointer(ival, 3);
3417   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3418   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3419   PetscFunctionReturn(PETSC_SUCCESS);
3420 }
3421 
3422 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3423 {
3424   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3425 
3426   PetscFunctionBegin;
3427   if (mumps->id.job == JOB_NULL) {
3428     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3429     for (i = 0; i < nCNTL_pre; ++i)
3430       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3431     if (i == nCNTL_pre) {
3432       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3433       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3434       mumps->CNTL_pre[0]++;
3435     }
3436     mumps->CNTL_pre[1 + 2 * i] = icntl;
3437     mumps->CNTL_pre[2 + 2 * i] = val;
3438   } else ID_CNTL_SET(mumps->id, icntl, val);
3439   PetscFunctionReturn(PETSC_SUCCESS);
3440 }
3441 
3442 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3443 {
3444   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3445 
3446   PetscFunctionBegin;
3447   if (mumps->id.job == JOB_NULL) {
3448     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3449     *val = 0.0;
3450     for (i = 0; i < nCNTL_pre; ++i) {
3451       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3452     }
3453   } else *val = ID_CNTL_GET(mumps->id, icntl);
3454   PetscFunctionReturn(PETSC_SUCCESS);
3455 }
3456 
3457 /*@C
3458   MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>
3459 
3460   Logically Collective
3461 
3462   Input Parameters:
3463 + F      - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3464 - tmpdir - temporary directory for out-of-core facility.
3465 
3466   Level: beginner
3467 
3468   Note:
3469   To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`.
3470   If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default.
3471 
3472 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3473 @*/
3474 PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir)
3475 {
3476   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3477 
3478   PetscFunctionBegin;
3479   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
3480   PetscValidType(F, 1);
3481   PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir)));
3482   PetscFunctionReturn(PETSC_SUCCESS);
3483 }
3484 
3485 /*@C
3486   MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>
3487 
3488   Logically Collective
3489 
3490   Input Parameter:
3491 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3492 
3493   Output Parameter:
3494 . tmpdir - temporary directory for out-of-core facility.
3495 
3496   Level: beginner
3497 
3498   Note:
3499   The returned string is read-only and user should not try to change it.
3500 
3501 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3502 @*/
3503 PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir)
3504 {
3505   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3506 
3507   PetscFunctionBegin;
3508   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
3509   PetscValidType(F, 1);
3510   if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir;
3511   PetscFunctionReturn(PETSC_SUCCESS);
3512 }
3513 
3514 /*@
3515   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3516 
3517   Logically Collective
3518 
3519   Input Parameters:
3520 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3521 . icntl - index of MUMPS parameter array `CNTL()`
3522 - val   - value of MUMPS `CNTL(icntl)`
3523 
3524   Options Database Key:
3525 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3526 
3527   Level: beginner
3528 
3529   Note:
3530   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3531 
3532 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3533 @*/
3534 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3535 {
3536   PetscFunctionBegin;
3537   PetscValidType(F, 1);
3538   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3539   PetscValidLogicalCollectiveInt(F, icntl, 2);
3540   PetscValidLogicalCollectiveReal(F, val, 3);
3541   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3542   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3543   PetscFunctionReturn(PETSC_SUCCESS);
3544 }
3545 
3546 /*@
3547   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3548 
3549   Logically Collective
3550 
3551   Input Parameters:
3552 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3553 - icntl - index of MUMPS parameter array CNTL()
3554 
3555   Output Parameter:
3556 . val - value of MUMPS CNTL(icntl)
3557 
3558   Level: beginner
3559 
3560 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3561 @*/
3562 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3563 {
3564   PetscFunctionBegin;
3565   PetscValidType(F, 1);
3566   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3567   PetscValidLogicalCollectiveInt(F, icntl, 2);
3568   PetscAssertPointer(val, 3);
3569   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3570   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3571   PetscFunctionReturn(PETSC_SUCCESS);
3572 }
3573 
3574 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3575 {
3576   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3577 
3578   PetscFunctionBegin;
3579   *info = mumps->id.INFO(icntl);
3580   PetscFunctionReturn(PETSC_SUCCESS);
3581 }
3582 
3583 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3584 {
3585   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3586 
3587   PetscFunctionBegin;
3588   *infog = mumps->id.INFOG(icntl);
3589   PetscFunctionReturn(PETSC_SUCCESS);
3590 }
3591 
3592 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3593 {
3594   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3595 
3596   PetscFunctionBegin;
3597   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3598   PetscFunctionReturn(PETSC_SUCCESS);
3599 }
3600 
3601 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3602 {
3603   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3604 
3605   PetscFunctionBegin;
3606   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3607   PetscFunctionReturn(PETSC_SUCCESS);
3608 }
3609 
3610 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3611 {
3612   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3613 
3614   PetscFunctionBegin;
3615   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3616   *size  = 0;
3617   *array = NULL;
3618   if (!mumps->myid) {
3619     *size = mumps->id.INFOG(28);
3620     PetscCall(PetscMalloc1(*size, array));
3621     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3622   }
3623   PetscFunctionReturn(PETSC_SUCCESS);
3624 }
3625 
3626 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3627 {
3628   Mat          Bt = NULL, Btseq = NULL;
3629   PetscBool    flg;
3630   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3631   PetscScalar *aa;
3632   PetscInt     spnr, *ia, *ja, M, nrhs;
3633 
3634   PetscFunctionBegin;
3635   PetscAssertPointer(spRHS, 2);
3636   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3637   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3638   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3639   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3640 
3641   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3642 
3643   if (mumps->petsc_size > 1) {
3644     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3645     Btseq         = b->A;
3646   } else {
3647     Btseq = Bt;
3648   }
3649 
3650   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3651   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3652   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3653   mumps->id.rhs = NULL;
3654 
3655   if (!mumps->myid) {
3656     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3657     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3658     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3659     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3660     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3661   } else {
3662     mumps->id.irhs_ptr    = NULL;
3663     mumps->id.irhs_sparse = NULL;
3664     mumps->id.nz_rhs      = 0;
3665     if (mumps->id.rhs_sparse_len) {
3666       PetscCall(PetscFree(mumps->id.rhs_sparse));
3667       mumps->id.rhs_sparse_len = 0;
3668     }
3669   }
3670   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3671   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3672 
3673   /* solve phase */
3674   mumps->id.job = JOB_SOLVE;
3675   PetscMUMPS_c(mumps);
3676   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3677 
3678   if (!mumps->myid) {
3679     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3680     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3681     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3682   }
3683   PetscFunctionReturn(PETSC_SUCCESS);
3684 }
3685 
3686 /*@
3687   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3688 
3689   Logically Collective
3690 
3691   Input Parameter:
3692 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3693 
3694   Output Parameter:
3695 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3696 
3697   Level: beginner
3698 
3699 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3700 @*/
3701 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3702 {
3703   PetscFunctionBegin;
3704   PetscValidType(F, 1);
3705   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3706   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3707   PetscFunctionReturn(PETSC_SUCCESS);
3708 }
3709 
3710 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3711 {
3712   Mat spRHS;
3713 
3714   PetscFunctionBegin;
3715   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3716   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3717   PetscCall(MatDestroy(&spRHS));
3718   PetscFunctionReturn(PETSC_SUCCESS);
3719 }
3720 
3721 /*@
3722   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3723 
3724   Logically Collective
3725 
3726   Input Parameter:
3727 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3728 
3729   Output Parameter:
3730 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3731 
3732   Level: beginner
3733 
3734 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3735 @*/
3736 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3737 {
3738   PetscBool flg;
3739 
3740   PetscFunctionBegin;
3741   PetscValidType(F, 1);
3742   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3743   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3744   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3745   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3746   PetscFunctionReturn(PETSC_SUCCESS);
3747 }
3748 
3749 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3750 {
3751   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3752 
3753   PetscFunctionBegin;
3754   if (nblk) {
3755     PetscAssertPointer(blkptr, 4);
3756     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3757     PetscCall(PetscFree(mumps->id.blkptr));
3758     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3759     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3760     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3761     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3762     // mumps->id.ICNTL(15) = 1;
3763     if (blkvar) {
3764       PetscCall(PetscFree(mumps->id.blkvar));
3765       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3766       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3767     }
3768   } else {
3769     PetscCall(PetscFree(mumps->id.blkptr));
3770     PetscCall(PetscFree(mumps->id.blkvar));
3771     // mumps->id.ICNTL(15) = 0;
3772     mumps->id.nblk = 0;
3773   }
3774   PetscFunctionReturn(PETSC_SUCCESS);
3775 }
3776 
3777 /*@
3778   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3779 
3780   Not collective, only relevant on the first process of the MPI communicator
3781 
3782   Input Parameters:
3783 + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3784 . nblk   - the number of blocks
3785 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3786 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3787 
3788   Level: advanced
3789 
3790 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3791 @*/
3792 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3793 {
3794   PetscFunctionBegin;
3795   PetscValidType(F, 1);
3796   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3797   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3798   PetscFunctionReturn(PETSC_SUCCESS);
3799 }
3800 
3801 /*@
3802   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3803 
3804   Logically Collective
3805 
3806   Input Parameters:
3807 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3808 - icntl - index of MUMPS parameter array INFO()
3809 
3810   Output Parameter:
3811 . ival - value of MUMPS INFO(icntl)
3812 
3813   Level: beginner
3814 
3815 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3816 @*/
3817 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3818 {
3819   PetscFunctionBegin;
3820   PetscValidType(F, 1);
3821   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3822   PetscAssertPointer(ival, 3);
3823   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3824   PetscFunctionReturn(PETSC_SUCCESS);
3825 }
3826 
3827 /*@
3828   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3829 
3830   Logically Collective
3831 
3832   Input Parameters:
3833 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3834 - icntl - index of MUMPS parameter array INFOG()
3835 
3836   Output Parameter:
3837 . ival - value of MUMPS INFOG(icntl)
3838 
3839   Level: beginner
3840 
3841 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3842 @*/
3843 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3844 {
3845   PetscFunctionBegin;
3846   PetscValidType(F, 1);
3847   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3848   PetscAssertPointer(ival, 3);
3849   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3850   PetscFunctionReturn(PETSC_SUCCESS);
3851 }
3852 
3853 /*@
3854   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3855 
3856   Logically Collective
3857 
3858   Input Parameters:
3859 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3860 - icntl - index of MUMPS parameter array RINFO()
3861 
3862   Output Parameter:
3863 . val - value of MUMPS RINFO(icntl)
3864 
3865   Level: beginner
3866 
3867 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3868 @*/
3869 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3870 {
3871   PetscFunctionBegin;
3872   PetscValidType(F, 1);
3873   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3874   PetscAssertPointer(val, 3);
3875   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3876   PetscFunctionReturn(PETSC_SUCCESS);
3877 }
3878 
3879 /*@
3880   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3881 
3882   Logically Collective
3883 
3884   Input Parameters:
3885 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3886 - icntl - index of MUMPS parameter array RINFOG()
3887 
3888   Output Parameter:
3889 . val - value of MUMPS RINFOG(icntl)
3890 
3891   Level: beginner
3892 
3893 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3894 @*/
3895 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3896 {
3897   PetscFunctionBegin;
3898   PetscValidType(F, 1);
3899   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3900   PetscAssertPointer(val, 3);
3901   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3902   PetscFunctionReturn(PETSC_SUCCESS);
3903 }
3904 
3905 /*@
3906   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3907 
3908   Logically Collective
3909 
3910   Input Parameter:
3911 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3912 
3913   Output Parameters:
3914 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3915 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3916           for freeing this array.
3917 
3918   Level: beginner
3919 
3920 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3921 @*/
3922 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3923 {
3924   PetscFunctionBegin;
3925   PetscValidType(F, 1);
3926   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3927   PetscAssertPointer(size, 2);
3928   PetscAssertPointer(array, 3);
3929   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3930   PetscFunctionReturn(PETSC_SUCCESS);
3931 }
3932 
3933 /*MC
3934   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3935   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3936 
3937   Works with `MATAIJ` and `MATSBAIJ` matrices
3938 
3939   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3940 
3941   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3942   See details below.
3943 
3944   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3945 
3946   Options Database Keys:
3947 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3948 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3949 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3950 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3951 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3952 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3953                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3954 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3955 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3956 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3957 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3958 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3959 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3960 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3961 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3962 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3963 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3964 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3965 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3966 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3967 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3968 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3969 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3970 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3971 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3972 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3973 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3974 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3975 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3976 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3977 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3978 .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3979 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3980 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3981 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3982 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3983 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3984 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3985 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3986 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3987                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3988 
3989   Level: beginner
3990 
3991   Notes:
3992   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3993   error if the matrix is Hermitian.
3994 
3995   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3996   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3997 
3998   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3999   the failure with
4000 .vb
4001           KSPGetPC(ksp,&pc);
4002           PCFactorGetMatrix(pc,&mat);
4003           MatMumpsGetInfo(mat,....);
4004           MatMumpsGetInfog(mat,....); etc.
4005 .ve
4006   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
4007 
4008   MUMPS provides 64-bit integer support in two build modes:
4009   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
4010   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
4011 
4012   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
4013   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
4014   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
4015   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
4016 
4017   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
4018 
4019   Two modes to run MUMPS/PETSc with OpenMP
4020 .vb
4021    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
4022    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
4023 .ve
4024 
4025 .vb
4026    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
4027    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
4028 .ve
4029 
4030    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
4031    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
4032    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
4033    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
4034    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
4035 
4036    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
4037    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
4038    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
4039    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
4040    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
4041    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
4042    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
4043    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
4044    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
4045    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
4046    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
4047    examine the mapping result.
4048 
4049    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
4050    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
4051    calls `omp_set_num_threads`(m) internally before calling MUMPS.
4052 
4053    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
4054 
4055 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
4056 M*/
4057 
4058 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
4059 {
4060   PetscFunctionBegin;
4061   *type = MATSOLVERMUMPS;
4062   PetscFunctionReturn(PETSC_SUCCESS);
4063 }
4064 
4065 /* MatGetFactor for Seq and MPI AIJ matrices */
4066 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4067 {
4068   Mat         B;
4069   Mat_MUMPS  *mumps;
4070   PetscBool   isSeqAIJ, isDiag, isDense;
4071   PetscMPIInt size;
4072 
4073   PetscFunctionBegin;
4074 #if defined(PETSC_USE_COMPLEX)
4075   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4076     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4077     *F = NULL;
4078     PetscFunctionReturn(PETSC_SUCCESS);
4079   }
4080 #endif
4081   /* Create the factorization matrix */
4082   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4083   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4084   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4085   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4086   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4087   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4088   PetscCall(MatSetUp(B));
4089 
4090   PetscCall(PetscNew(&mumps));
4091 
4092   B->ops->view    = MatView_MUMPS;
4093   B->ops->getinfo = MatGetInfo_MUMPS;
4094 
4095   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4096   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4097   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4098   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4099   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4100   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4101   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4102   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4103   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4104   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4105   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4106   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4107   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4108   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4109   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4110 
4111   if (ftype == MAT_FACTOR_LU) {
4112     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4113     B->factortype            = MAT_FACTOR_LU;
4114     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4115     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4116     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4117     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4118     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4119     mumps->sym = 0;
4120   } else {
4121     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4122     B->factortype                  = MAT_FACTOR_CHOLESKY;
4123     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4124     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4125     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4126     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4127     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4128 #if defined(PETSC_USE_COMPLEX)
4129     mumps->sym = 2;
4130 #else
4131     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4132     else mumps->sym = 2;
4133 #endif
4134   }
4135 
4136   /* set solvertype */
4137   PetscCall(PetscFree(B->solvertype));
4138   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4139   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4140   if (size == 1) {
4141     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4142     B->canuseordering = PETSC_TRUE;
4143   }
4144   B->ops->destroy = MatDestroy_MUMPS;
4145   B->data         = (void *)mumps;
4146 
4147   *F               = B;
4148   mumps->id.job    = JOB_NULL;
4149   mumps->ICNTL_pre = NULL;
4150   mumps->CNTL_pre  = NULL;
4151   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4152   PetscFunctionReturn(PETSC_SUCCESS);
4153 }
4154 
4155 /* MatGetFactor for Seq and MPI SBAIJ matrices */
4156 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4157 {
4158   Mat         B;
4159   Mat_MUMPS  *mumps;
4160   PetscBool   isSeqSBAIJ;
4161   PetscMPIInt size;
4162 
4163   PetscFunctionBegin;
4164 #if defined(PETSC_USE_COMPLEX)
4165   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4166     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4167     *F = NULL;
4168     PetscFunctionReturn(PETSC_SUCCESS);
4169   }
4170 #endif
4171   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4172   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4173   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4174   PetscCall(MatSetUp(B));
4175 
4176   PetscCall(PetscNew(&mumps));
4177   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4178   if (isSeqSBAIJ) {
4179     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4180   } else {
4181     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4182   }
4183 
4184   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4185   B->ops->view                   = MatView_MUMPS;
4186   B->ops->getinfo                = MatGetInfo_MUMPS;
4187 
4188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4202   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4203 
4204   B->factortype = MAT_FACTOR_CHOLESKY;
4205 #if defined(PETSC_USE_COMPLEX)
4206   mumps->sym = 2;
4207 #else
4208   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4209   else mumps->sym = 2;
4210 #endif
4211 
4212   /* set solvertype */
4213   PetscCall(PetscFree(B->solvertype));
4214   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4215   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4216   if (size == 1) {
4217     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4218     B->canuseordering = PETSC_TRUE;
4219   }
4220   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4221   B->ops->destroy = MatDestroy_MUMPS;
4222   B->data         = (void *)mumps;
4223 
4224   *F               = B;
4225   mumps->id.job    = JOB_NULL;
4226   mumps->ICNTL_pre = NULL;
4227   mumps->CNTL_pre  = NULL;
4228   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4229   PetscFunctionReturn(PETSC_SUCCESS);
4230 }
4231 
4232 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4233 {
4234   Mat         B;
4235   Mat_MUMPS  *mumps;
4236   PetscBool   isSeqBAIJ;
4237   PetscMPIInt size;
4238 
4239   PetscFunctionBegin;
4240   /* Create the factorization matrix */
4241   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4242   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4243   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4244   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4245   PetscCall(MatSetUp(B));
4246 
4247   PetscCall(PetscNew(&mumps));
4248   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4249   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4250   B->factortype            = MAT_FACTOR_LU;
4251   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4252   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4253   mumps->sym = 0;
4254   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4255 
4256   B->ops->view    = MatView_MUMPS;
4257   B->ops->getinfo = MatGetInfo_MUMPS;
4258 
4259   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4260   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4261   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4262   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4263   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4264   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4265   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4266   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4267   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4268   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4269   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4270   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4271   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4272   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4273   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4274 
4275   /* set solvertype */
4276   PetscCall(PetscFree(B->solvertype));
4277   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4278   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4279   if (size == 1) {
4280     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4281     B->canuseordering = PETSC_TRUE;
4282   }
4283   B->ops->destroy = MatDestroy_MUMPS;
4284   B->data         = (void *)mumps;
4285 
4286   *F               = B;
4287   mumps->id.job    = JOB_NULL;
4288   mumps->ICNTL_pre = NULL;
4289   mumps->CNTL_pre  = NULL;
4290   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4291   PetscFunctionReturn(PETSC_SUCCESS);
4292 }
4293 
4294 /* MatGetFactor for Seq and MPI SELL matrices */
4295 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4296 {
4297   Mat         B;
4298   Mat_MUMPS  *mumps;
4299   PetscBool   isSeqSELL;
4300   PetscMPIInt size;
4301 
4302   PetscFunctionBegin;
4303   /* Create the factorization matrix */
4304   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4305   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4306   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4307   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4308   PetscCall(MatSetUp(B));
4309 
4310   PetscCall(PetscNew(&mumps));
4311 
4312   B->ops->view    = MatView_MUMPS;
4313   B->ops->getinfo = MatGetInfo_MUMPS;
4314 
4315   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4316   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4317   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4318   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4319   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4320   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4321   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4322   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4323   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4324   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4325   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4326   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4327 
4328   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4329   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4330   B->factortype            = MAT_FACTOR_LU;
4331   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4332   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4333   mumps->sym              = 0;
4334   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4335 
4336   /* set solvertype */
4337   PetscCall(PetscFree(B->solvertype));
4338   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4339   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4340   if (size == 1) {
4341     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4342     B->canuseordering = PETSC_TRUE;
4343   }
4344   B->ops->destroy = MatDestroy_MUMPS;
4345   B->data         = (void *)mumps;
4346 
4347   *F               = B;
4348   mumps->id.job    = JOB_NULL;
4349   mumps->ICNTL_pre = NULL;
4350   mumps->CNTL_pre  = NULL;
4351   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4352   PetscFunctionReturn(PETSC_SUCCESS);
4353 }
4354 
4355 /* MatGetFactor for MATNEST matrices */
4356 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4357 {
4358   Mat         B, **mats;
4359   Mat_MUMPS  *mumps;
4360   PetscInt    nr, nc;
4361   PetscMPIInt size;
4362   PetscBool   flg = PETSC_TRUE;
4363 
4364   PetscFunctionBegin;
4365 #if defined(PETSC_USE_COMPLEX)
4366   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4367     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4368     *F = NULL;
4369     PetscFunctionReturn(PETSC_SUCCESS);
4370   }
4371 #endif
4372 
4373   /* Return if some condition is not satisfied */
4374   *F = NULL;
4375   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4376   if (ftype == MAT_FACTOR_CHOLESKY) {
4377     IS       *rows, *cols;
4378     PetscInt *m, *M;
4379 
4380     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4381     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4382     PetscCall(MatNestGetISs(A, rows, cols));
4383     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4384     if (!flg) {
4385       PetscCall(PetscFree2(rows, cols));
4386       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4387       PetscFunctionReturn(PETSC_SUCCESS);
4388     }
4389     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4390     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4391     for (PetscInt r = 0; flg && r < nr; r++)
4392       for (PetscInt k = r + 1; flg && k < nr; k++)
4393         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4394     PetscCall(PetscFree2(m, M));
4395     PetscCall(PetscFree2(rows, cols));
4396     if (!flg) {
4397       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4398       PetscFunctionReturn(PETSC_SUCCESS);
4399     }
4400   }
4401 
4402   for (PetscInt r = 0; r < nr; r++) {
4403     for (PetscInt c = 0; c < nc; c++) {
4404       Mat       sub = mats[r][c];
4405       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4406 
4407       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4408       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4409       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4410       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4411       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4412       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4413       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4414       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4415       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4416       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4417       if (ftype == MAT_FACTOR_CHOLESKY) {
4418         if (r == c) {
4419           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4420             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4421             flg = PETSC_FALSE;
4422           }
4423         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4424           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4425           flg = PETSC_FALSE;
4426         }
4427       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4428         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4429         flg = PETSC_FALSE;
4430       }
4431     }
4432   }
4433   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4434 
4435   /* Create the factorization matrix */
4436   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4437   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4438   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4439   PetscCall(MatSetUp(B));
4440 
4441   PetscCall(PetscNew(&mumps));
4442 
4443   B->ops->view    = MatView_MUMPS;
4444   B->ops->getinfo = MatGetInfo_MUMPS;
4445 
4446   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4447   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4448   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4449   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4450   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4451   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4452   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4453   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4454   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4455   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4456   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4457   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4458   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4459   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4460   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4461 
4462   if (ftype == MAT_FACTOR_LU) {
4463     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4464     B->factortype            = MAT_FACTOR_LU;
4465     mumps->sym               = 0;
4466   } else {
4467     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4468     B->factortype                  = MAT_FACTOR_CHOLESKY;
4469 #if defined(PETSC_USE_COMPLEX)
4470     mumps->sym = 2;
4471 #else
4472     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4473     else mumps->sym = 2;
4474 #endif
4475   }
4476   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4477   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4478 
4479   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4480   if (size == 1) {
4481     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4482     B->canuseordering = PETSC_TRUE;
4483   }
4484 
4485   /* set solvertype */
4486   PetscCall(PetscFree(B->solvertype));
4487   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4488   B->ops->destroy = MatDestroy_MUMPS;
4489   B->data         = (void *)mumps;
4490 
4491   *F               = B;
4492   mumps->id.job    = JOB_NULL;
4493   mumps->ICNTL_pre = NULL;
4494   mumps->CNTL_pre  = NULL;
4495   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4496   PetscFunctionReturn(PETSC_SUCCESS);
4497 }
4498 
4499 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4500 {
4501   PetscFunctionBegin;
4502   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4503   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4504   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4505   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4506   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4507   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4508   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4509   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4510   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4511   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4512   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4513   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4514   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4515   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4516   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4517   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4518   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4519   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4520   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4521   PetscFunctionReturn(PETSC_SUCCESS);
4522 }
4523