xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 7cd49bdee00a26e142bff6e7df7c3fb9209fa795)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 #include <petsc/private/vecimpl.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15   #include <cmumps_c.h>
16   #include <zmumps_c.h>
17   #include <smumps_c.h>
18   #include <dmumps_c.h>
19 #else
20   #if defined(PETSC_USE_COMPLEX)
21     #if defined(PETSC_USE_REAL_SINGLE)
22       #include <cmumps_c.h>
23       #define MUMPS_c       cmumps_c
24       #define MUMPS_STRUC_C CMUMPS_STRUC_C
25       #define MumpsScalar   CMUMPS_COMPLEX
26     #else
27       #include <zmumps_c.h>
28       #define MUMPS_c       zmumps_c
29       #define MUMPS_STRUC_C ZMUMPS_STRUC_C
30       #define MumpsScalar   ZMUMPS_COMPLEX
31     #endif
32   #else
33     #if defined(PETSC_USE_REAL_SINGLE)
34       #include <smumps_c.h>
35       #define MUMPS_c       smumps_c
36       #define MUMPS_STRUC_C SMUMPS_STRUC_C
37       #define MumpsScalar   SMUMPS_REAL
38     #else
39       #include <dmumps_c.h>
40       #define MUMPS_c       dmumps_c
41       #define MUMPS_STRUC_C DMUMPS_STRUC_C
42       #define MumpsScalar   DMUMPS_REAL
43     #endif
44   #endif
45 #endif
46 EXTERN_C_END
47 
48 #define JOB_INIT         -1
49 #define JOB_NULL         0
50 #define JOB_FACTSYMBOLIC 1
51 #define JOB_FACTNUMERIC  2
52 #define JOB_SOLVE        3
53 #define JOB_END          -2
54 
55 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
56    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
57    naming convention in PetscMPIInt, PetscBLASInt etc.
58 */
59 typedef MUMPS_INT PetscMUMPSInt;
60 
61 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
62   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
63     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
64   #endif
65 #else
66   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
67     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
68   #endif
69 #endif
70 
71 #define MPIU_MUMPSINT       MPI_INT
72 #define PETSC_MUMPS_INT_MAX 2147483647
73 #define PETSC_MUMPS_INT_MIN -2147483648
74 
75 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
76 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
77 {
78   PetscFunctionBegin;
79 #if PetscDefined(USE_64BIT_INDICES)
80   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
81 #endif
82   *b = (PetscMUMPSInt)a;
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 /* Put these utility routines here since they are only used in this file */
87 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
88 {
89   PetscInt  myval;
90   PetscBool myset;
91 
92   PetscFunctionBegin;
93   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
94   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
95   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
96   if (set) *set = myset;
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
100 
101 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
102 //
103 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
104 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
105 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
106 //
107 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
108 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
109 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
110 typedef struct {
111   PetscPrecision precision;   // precision used by MUMPS
112   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
113 
114   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
115   MUMPS_INT  sym, par, job;
116   MUMPS_INT  comm_fortran; /* Fortran communicator */
117   MUMPS_INT *icntl;
118   void      *cntl; // MumpsReal, fixed size array
119   MUMPS_INT  n;
120   MUMPS_INT  nblk;
121 
122   /* Assembled entry */
123   MUMPS_INT8 nnz;
124   MUMPS_INT *irn;
125   MUMPS_INT *jcn;
126   void      *a; // MumpsScalar, centralized input
127   PetscCount a_len;
128 
129   /* Distributed entry */
130   MUMPS_INT8 nnz_loc;
131   MUMPS_INT *irn_loc;
132   MUMPS_INT *jcn_loc;
133   void      *a_loc; // MumpsScalar, distributed input
134   PetscCount a_loc_len;
135 
136   /* Matrix by blocks */
137   MUMPS_INT *blkptr;
138   MUMPS_INT *blkvar;
139 
140   /* Ordering, if given by user */
141   MUMPS_INT *perm_in;
142 
143   /* RHS, solution, ouptput data and statistics */
144   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
145   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
146 
147   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
148   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
149   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
150   MUMPS_INT  schur_lld;
151   MUMPS_INT *info, *infog;   // fixed size array
152   void      *rinfo, *rinfog; // MumpsReal, fixed size array
153 
154   /* Null space */
155   MUMPS_INT *pivnul_list; // allocated by MUMPS!
156   MUMPS_INT *mapping;     // allocated by MUMPS!
157 
158   /* Schur */
159   MUMPS_INT  size_schur;
160   MUMPS_INT *listvar_schur;
161   void      *schur; // MumpsScalar
162   PetscCount schur_len;
163 
164   /* For out-of-core */
165   char *ooc_tmpdir; // fixed size array
166   char *ooc_prefix; // fixed size array
167 } XMUMPS_STRUC_C;
168 
169 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
170 #define AllocatInternalID(MUMPS_STRUC_T, outer) \
171   do { \
172     MUMPS_STRUC_T *inner; \
173     PetscCall(PetscNew(&inner)); \
174     outer->icntl      = inner->icntl; \
175     outer->cntl       = inner->cntl; \
176     outer->info       = inner->info; \
177     outer->infog      = inner->infog; \
178     outer->rinfo      = inner->rinfo; \
179     outer->rinfog     = inner->rinfog; \
180     outer->ooc_tmpdir = inner->ooc_tmpdir; \
181     outer->ooc_prefix = inner->ooc_prefix; \
182     /* the three field should never change after init */ \
183     inner->comm_fortran = outer->comm_fortran; \
184     inner->par          = outer->par; \
185     inner->sym          = outer->sym; \
186     outer->internal_id  = inner; \
187   } while (0)
188 
189 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
190 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
191 {
192   PetscFunctionBegin;
193   outer->precision = precision;
194 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
195   #if defined(PETSC_USE_COMPLEX)
196   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer);
197   else AllocatInternalID(ZMUMPS_STRUC_C, outer);
198   #else
199   if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer);
200   else AllocatInternalID(DMUMPS_STRUC_C, outer);
201   #endif
202 #else
203   AllocatInternalID(MUMPS_STRUC_C, outer);
204 #endif
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
209   do { \
210     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
211     PetscCall(PetscFree(inner->a)); \
212     PetscCall(PetscFree(inner->a_loc)); \
213     PetscCall(PetscFree(inner->redrhs)); \
214     PetscCall(PetscFree(inner->rhs)); \
215     PetscCall(PetscFree(inner->rhs_sparse)); \
216     PetscCall(PetscFree(inner->rhs_loc)); \
217     PetscCall(PetscFree(inner->sol_loc)); \
218     PetscCall(PetscFree(inner->schur)); \
219   } while (0)
220 
221 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
222 {
223   PetscFunctionBegin;
224   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
225 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
226     const PetscPrecision mumps_precision = outer->precision;
227     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
228   #if defined(PETSC_USE_COMPLEX)
229       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
230       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
231   #else
232       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
233       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
234   #endif
235     }
236 #endif
237     PetscCall(PetscFree(outer->internal_id));
238   }
239   PetscFunctionReturn(PETSC_SUCCESS);
240 }
241 
242 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
243 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
244 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
245 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
246 //
247 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
248 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
249 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
250 //
251 //
252 //   Input parameters:
253 // + convert   - whether to do PetscScalar to MumpsScalar conversion
254 // . n         - length of the PetscScalar array
255 // . pa        - [n]], points to the PetscScalar array
256 // . precision - precision of MumpsScalar
257 // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
258 // - ma        - on input, an existing MumpsScalar array if any.
259 //
260 //   Output parameters:
261 // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
262 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
263 //
264 //   Note:
265 //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
266 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
267 {
268   PetscFunctionBegin;
269 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
270   const PetscPrecision mumps_precision = precision;
271   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
272   #if defined(PETSC_USE_COMPLEX)
273   if (mumps_precision != PETSC_SCALAR_PRECISION) {
274     if (mumps_precision == PETSC_PRECISION_SINGLE) {
275       if (*m < n) {
276         PetscCall(PetscFree(*ma));
277         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
278         *m = n;
279       }
280       if (convert) {
281         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
282         for (PetscCount i = 0; i < n; i++) {
283           b[i].r = PetscRealPart(pa[i]);
284           b[i].i = PetscImaginaryPart(pa[i]);
285         }
286       }
287     } else {
288       if (*m < n) {
289         PetscCall(PetscFree(*ma));
290         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
291         *m = n;
292       }
293       if (convert) {
294         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
295         for (PetscCount i = 0; i < n; i++) {
296           b[i].r = PetscRealPart(pa[i]);
297           b[i].i = PetscImaginaryPart(pa[i]);
298         }
299       }
300     }
301   }
302   #else
303   if (mumps_precision != PETSC_SCALAR_PRECISION) {
304     if (mumps_precision == PETSC_PRECISION_SINGLE) {
305       if (*m < n) {
306         PetscCall(PetscFree(*ma));
307         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
308         *m = n;
309       }
310       if (convert) {
311         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
312         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
313       }
314     } else {
315       if (*m < n) {
316         PetscCall(PetscFree(*ma));
317         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
318         *m = n;
319       }
320       if (convert) {
321         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
322         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
323       }
324     }
325   }
326   #endif
327   else
328 #endif
329   {
330     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
331     *ma = (void *)pa;                       // same precision, make them alias
332     *m  = 0;
333   }
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336 
337 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
338 //
339 // 1) If the two types are different, cast array elements.
340 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
341 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
342 {
343   PetscFunctionBegin;
344 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
345   if (mumps_precision != PETSC_SCALAR_PRECISION) {
346   #if defined(PETSC_USE_COMPLEX)
347     if (mumps_precision == PETSC_PRECISION_SINGLE) {
348       PetscReal         *a = (PetscReal *)pa;
349       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
350       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
351     } else {
352       PetscReal         *a = (PetscReal *)pa;
353       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
354       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
355     }
356   #else
357     if (mumps_precision == PETSC_PRECISION_SINGLE) {
358       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
360     } else {
361       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
362       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
363     }
364   #endif
365   } else
366 #endif
367     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 
371 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
372 //
373 // 1) If the two types are different, cast array elements.
374 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
375 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
376 {
377   PetscFunctionBegin;
378 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
379   if (mumps_precision != PETSC_SCALAR_PRECISION) {
380   #if defined(PETSC_USE_COMPLEX)
381     if (mumps_precision == PETSC_PRECISION_SINGLE) {
382       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
383       for (PetscCount i = 0; i < n; i++) {
384         b[i].r = PetscRealPart(pa[i]);
385         b[i].i = PetscImaginaryPart(pa[i]);
386       }
387     } else {
388       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
389       for (PetscCount i = 0; i < n; i++) {
390         b[i].r = PetscRealPart(pa[i]);
391         b[i].i = PetscImaginaryPart(pa[i]);
392       }
393     }
394   #else
395     if (mumps_precision == PETSC_PRECISION_SINGLE) {
396       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
397       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
398     } else {
399       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
400       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
401     }
402   #endif
403   } else
404 #endif
405     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
409 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
410 {
411   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
412 }
413 
414 #define PreMumpsCall(inner, outer, mumpsscalar) \
415   do { \
416     inner->job           = outer->job; \
417     inner->n             = outer->n; \
418     inner->nblk          = outer->nblk; \
419     inner->nnz           = outer->nnz; \
420     inner->irn           = outer->irn; \
421     inner->jcn           = outer->jcn; \
422     inner->a             = (mumpsscalar *)outer->a; \
423     inner->nnz_loc       = outer->nnz_loc; \
424     inner->irn_loc       = outer->irn_loc; \
425     inner->jcn_loc       = outer->jcn_loc; \
426     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
427     inner->blkptr        = outer->blkptr; \
428     inner->blkvar        = outer->blkvar; \
429     inner->perm_in       = outer->perm_in; \
430     inner->rhs           = (mumpsscalar *)outer->rhs; \
431     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
432     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
433     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
434     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
435     inner->irhs_sparse   = outer->irhs_sparse; \
436     inner->irhs_ptr      = outer->irhs_ptr; \
437     inner->isol_loc      = outer->isol_loc; \
438     inner->irhs_loc      = outer->irhs_loc; \
439     inner->nrhs          = outer->nrhs; \
440     inner->lrhs          = outer->lrhs; \
441     inner->lredrhs       = outer->lredrhs; \
442     inner->nz_rhs        = outer->nz_rhs; \
443     inner->lsol_loc      = outer->lsol_loc; \
444     inner->nloc_rhs      = outer->nloc_rhs; \
445     inner->lrhs_loc      = outer->lrhs_loc; \
446     inner->schur_lld     = outer->schur_lld; \
447     inner->size_schur    = outer->size_schur; \
448     inner->listvar_schur = outer->listvar_schur; \
449     inner->schur         = (mumpsscalar *)outer->schur; \
450   } while (0)
451 
452 #define PostMumpsCall(inner, outer) \
453   do { \
454     outer->pivnul_list = inner->pivnul_list; \
455     outer->mapping     = inner->mapping; \
456   } while (0)
457 
458 // Entry for PETSc to call mumps
459 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
460 {
461   PetscFunctionBegin;
462 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
463   #if defined(PETSC_USE_COMPLEX)
464   if (outer->precision == PETSC_PRECISION_SINGLE) {
465     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
466     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
467     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
468     PostMumpsCall(inner, outer);
469   } else {
470     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
471     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
472     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
473     PostMumpsCall(inner, outer);
474   }
475   #else
476   if (outer->precision == PETSC_PRECISION_SINGLE) {
477     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
478     PreMumpsCall(inner, outer, SMUMPS_REAL);
479     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
480     PostMumpsCall(inner, outer);
481   } else {
482     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
483     PreMumpsCall(inner, outer, DMUMPS_REAL);
484     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
485     PostMumpsCall(inner, outer);
486   }
487   #endif
488 #else
489   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
490   PreMumpsCall(inner, outer, MumpsScalar);
491   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
492   PostMumpsCall(inner, outer);
493 #endif
494   PetscFunctionReturn(PETSC_SUCCESS);
495 }
496 
497 /* macros s.t. indices match MUMPS documentation */
498 #define ICNTL(I) icntl[(I) - 1]
499 #define INFOG(I) infog[(I) - 1]
500 #define INFO(I)  info[(I) - 1]
501 
502 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
503 #if defined(PETSC_USE_COMPLEX)
504   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
505 #else
506   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
507 #endif
508 
509 // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
510 #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
511 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
512 #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
513 
514 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
515 #define ID_CNTL_SET(ID, I, VAL) \
516   do { \
517     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
518     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
519   } while (0)
520 
521 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
522 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
523   #define PetscMUMPS_c(mumps) \
524     do { \
525       if (mumps->use_petsc_omp_support) { \
526         if (mumps->is_omp_master) { \
527           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
528           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
529           PetscCall(PetscCallMumps_Private(&mumps->id)); \
530           PetscCall(PetscFPTrapPop()); \
531           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
532         } \
533         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
534         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
535          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
536          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
537          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
538       */ \
539         SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
540         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
541         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
542         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
543         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
544       } else { \
545         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
546         PetscCall(PetscCallMumps_Private(&mumps->id)); \
547         PetscCall(PetscFPTrapPop()); \
548       } \
549     } while (0)
550 #else
551   #define PetscMUMPS_c(mumps) \
552     do { \
553       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
554       PetscCall(PetscCallMumps_Private(&mumps->id)); \
555       PetscCall(PetscFPTrapPop()); \
556     } while (0)
557 #endif
558 
559 typedef struct Mat_MUMPS Mat_MUMPS;
560 struct Mat_MUMPS {
561   XMUMPS_STRUC_C id;
562 
563   MatStructure   matstruc;
564   PetscMPIInt    myid, petsc_size;
565   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
566   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
567   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
568   PetscMUMPSInt  sym;
569   MPI_Comm       mumps_comm;
570   PetscMUMPSInt *ICNTL_pre;
571   PetscReal     *CNTL_pre;
572   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
573   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
574   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
575   PetscMUMPSInt  ICNTL26;
576   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
577 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
578   PetscInt    *rhs_nrow, max_nrhs;
579   PetscMPIInt *rhs_recvcounts, *rhs_disps;
580   PetscScalar *rhs_loc, *rhs_recvbuf;
581 #endif
582   Vec            b_seq, x_seq;
583   PetscInt       ninfo, *info; /* which INFO to display */
584   PetscInt       sizeredrhs;
585   PetscScalar   *schur_sol;
586   PetscInt       schur_sizesol;
587   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
588   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
589   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
590   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
591 
592   /* Support for MATNEST */
593   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
594   PetscCount  *nest_vals_start;
595   PetscScalar *nest_vals;
596 
597   /* stuff used by petsc/mumps OpenMP support*/
598   PetscBool    use_petsc_omp_support;
599   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
600   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
601   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
602   PetscMPIInt  tag, omp_comm_size;
603   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
604   MPI_Request *reqs;
605 };
606 
607 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
608    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
609  */
610 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
611 {
612   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
613 
614   PetscFunctionBegin;
615 #if defined(PETSC_USE_64BIT_INDICES)
616   {
617     PetscInt i;
618     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
619       PetscCall(PetscFree(mumps->ia_alloc));
620       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
621       mumps->cur_ilen = nrow + 1;
622     }
623     if (nnz > mumps->cur_jlen) {
624       PetscCall(PetscFree(mumps->ja_alloc));
625       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
626       mumps->cur_jlen = nnz;
627     }
628     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
629     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
630     *ia_mumps = mumps->ia_alloc;
631     *ja_mumps = mumps->ja_alloc;
632   }
633 #else
634   *ia_mumps = ia;
635   *ja_mumps = ja;
636 #endif
637   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
638   PetscFunctionReturn(PETSC_SUCCESS);
639 }
640 
641 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
642 {
643   PetscFunctionBegin;
644   PetscCall(PetscFree(mumps->id.listvar_schur));
645   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
646   PetscCall(PetscFree(mumps->schur_sol));
647   mumps->id.size_schur = 0;
648   mumps->id.schur_lld  = 0;
649   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
653 /* solve with rhs in mumps->id.redrhs and return in the same location */
654 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
655 {
656   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
657   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
658   MatFactorSchurStatus schurstatus;
659   PetscInt             sizesol;
660   const PetscScalar   *xarray;
661 
662   PetscFunctionBegin;
663   PetscCall(MatFactorFactorizeSchurComplement(F));
664   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
665   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
666 
667   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
668   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
669 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
670   PetscCall(MatBindToCPU(B, S->boundtocpu));
671 #endif
672   switch (schurstatus) {
673   case MAT_FACTOR_SCHUR_FACTORED:
674     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
675     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
676 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
677     PetscCall(MatBindToCPU(X, S->boundtocpu));
678 #endif
679     if (!mumps->id.ICNTL(9)) { /* transpose solve */
680       PetscCall(MatMatSolveTranspose(S, B, X));
681     } else {
682       PetscCall(MatMatSolve(S, B, X));
683     }
684     break;
685   case MAT_FACTOR_SCHUR_INVERTED:
686     sizesol = mumps->id.nrhs * mumps->id.size_schur;
687     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
688       PetscCall(PetscFree(mumps->schur_sol));
689       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
690       mumps->schur_sizesol = sizesol;
691     }
692     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
693     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
694 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
695     PetscCall(MatBindToCPU(X, S->boundtocpu));
696 #endif
697     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
698     if (!mumps->id.ICNTL(9)) { /* transpose solve */
699       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
700     } else {
701       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
702     }
703     PetscCall(MatProductSetFromOptions(X));
704     PetscCall(MatProductSymbolic(X));
705     PetscCall(MatProductNumeric(X));
706 
707     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
708     break;
709   default:
710     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
711   }
712   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
713   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
714   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
715   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
716   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
717   PetscCall(MatDestroy(&B));
718   PetscCall(MatDestroy(&X));
719   PetscFunctionReturn(PETSC_SUCCESS);
720 }
721 
722 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
723 {
724   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
725 
726   PetscFunctionBegin;
727   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
728     PetscFunctionReturn(PETSC_SUCCESS);
729   }
730   if (!expansion) { /* prepare for the condensation step */
731     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
732     /* allocate MUMPS internal array to store reduced right-hand sides */
733     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
734       mumps->id.lredrhs = mumps->id.size_schur;
735       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
736       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
737       PetscCall(PetscFree(mumps->redrhs));
738       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
739       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
740     }
741   } else {                                    /* prepare for the expansion step */
742     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
743     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
744     PetscMUMPS_c(mumps);
745     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
746     /* restore defaults */
747     mumps->id.ICNTL(26) = -1;
748     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
749     if (mumps->id.nrhs > 1) {
750       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
751       PetscCall(PetscFree(mumps->redrhs));
752       mumps->id.redrhs_len = 0;
753       mumps->id.lredrhs    = 0;
754       mumps->sizeredrhs    = 0;
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 /*
761   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
762 
763   input:
764     A       - matrix in aij,baij or sbaij format
765     shift   - 0: C style output triple; 1: Fortran style output triple.
766     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
767               MAT_REUSE_MATRIX:   only the values in v array are updated
768   output:
769     nnz     - dim of r, c, and v (number of local nonzero entries of A)
770     r, c, v - row and col index, matrix values (matrix triples)
771 
772   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
773   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
774   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
775 
776  */
777 
778 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
779 {
780   const PetscScalar *av;
781   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
782   PetscCount         nz, rnz, k;
783   PetscMUMPSInt     *row, *col;
784   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
785 
786   PetscFunctionBegin;
787   PetscCall(MatSeqAIJGetArrayRead(A, &av));
788   if (reuse == MAT_INITIAL_MATRIX) {
789     nz = aa->nz;
790     ai = aa->i;
791     aj = aa->j;
792     PetscCall(PetscMalloc2(nz, &row, nz, &col));
793     for (PetscCount i = k = 0; i < M; i++) {
794       rnz = ai[i + 1] - ai[i];
795       ajj = aj + ai[i];
796       for (PetscCount j = 0; j < rnz; j++) {
797         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
798         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
799         k++;
800       }
801     }
802     mumps->val = (PetscScalar *)av;
803     mumps->irn = row;
804     mumps->jcn = col;
805     mumps->nnz = nz;
806   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
807   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
808   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
813 {
814   PetscCount     nz, i, j, k, r;
815   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
816   PetscMUMPSInt *row, *col;
817 
818   PetscFunctionBegin;
819   nz = a->sliidx[a->totalslices];
820   if (reuse == MAT_INITIAL_MATRIX) {
821     PetscCall(PetscMalloc2(nz, &row, nz, &col));
822     for (i = k = 0; i < a->totalslices; i++) {
823       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
824     }
825     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
826     mumps->irn = row;
827     mumps->jcn = col;
828     mumps->nnz = nz;
829     mumps->val = a->val;
830   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
831   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
832   PetscFunctionReturn(PETSC_SUCCESS);
833 }
834 
835 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
836 {
837   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
838   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
839   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
840   PetscInt        bs;
841   PetscMUMPSInt  *row, *col;
842 
843   PetscFunctionBegin;
844   if (reuse == MAT_INITIAL_MATRIX) {
845     PetscCall(MatGetBlockSize(A, &bs));
846     M  = A->rmap->N / bs;
847     ai = aa->i;
848     aj = aa->j;
849     PetscCall(PetscMalloc2(nz, &row, nz, &col));
850     for (i = 0; i < M; i++) {
851       ajj = aj + ai[i];
852       rnz = ai[i + 1] - ai[i];
853       for (k = 0; k < rnz; k++) {
854         for (j = 0; j < bs; j++) {
855           for (m = 0; m < bs; m++) {
856             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
857             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
858             idx++;
859           }
860         }
861       }
862     }
863     mumps->irn = row;
864     mumps->jcn = col;
865     mumps->nnz = nz;
866     mumps->val = aa->a;
867   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
868   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
873 {
874   const PetscInt *ai, *aj, *ajj;
875   PetscInt        bs;
876   PetscCount      nz, rnz, i, j, k, m;
877   PetscMUMPSInt  *row, *col;
878   PetscScalar    *val;
879   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
880   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
881 #if defined(PETSC_USE_COMPLEX)
882   PetscBool isset, hermitian;
883 #endif
884 
885   PetscFunctionBegin;
886 #if defined(PETSC_USE_COMPLEX)
887   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
888   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
889 #endif
890   ai = aa->i;
891   aj = aa->j;
892   PetscCall(MatGetBlockSize(A, &bs));
893   if (reuse == MAT_INITIAL_MATRIX) {
894     const PetscCount alloc_size = aa->nz * bs2;
895 
896     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
897     if (bs > 1) {
898       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
899       mumps->val = mumps->val_alloc;
900     } else {
901       mumps->val = aa->a;
902     }
903     mumps->irn = row;
904     mumps->jcn = col;
905   } else {
906     row = mumps->irn;
907     col = mumps->jcn;
908   }
909   val = mumps->val;
910 
911   nz = 0;
912   if (bs > 1) {
913     for (i = 0; i < mbs; i++) {
914       rnz = ai[i + 1] - ai[i];
915       ajj = aj + ai[i];
916       for (j = 0; j < rnz; j++) {
917         for (k = 0; k < bs; k++) {
918           for (m = 0; m < bs; m++) {
919             if (ajj[j] > i || k >= m) {
920               if (reuse == MAT_INITIAL_MATRIX) {
921                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
922                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
923               }
924               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
925             }
926           }
927         }
928       }
929     }
930   } else if (reuse == MAT_INITIAL_MATRIX) {
931     for (i = 0; i < mbs; i++) {
932       rnz = ai[i + 1] - ai[i];
933       ajj = aj + ai[i];
934       for (j = 0; j < rnz; j++) {
935         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
936         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
937         nz++;
938       }
939     }
940     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
941   } else if (mumps->nest_vals)
942     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
943   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
944   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
945   PetscFunctionReturn(PETSC_SUCCESS);
946 }
947 
948 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
949 {
950   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
951   PetscCount         nz, rnz, i, j;
952   const PetscScalar *av, *v1;
953   PetscScalar       *val;
954   PetscMUMPSInt     *row, *col;
955   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
956   PetscBool          diagDense;
957 #if defined(PETSC_USE_COMPLEX)
958   PetscBool hermitian, isset;
959 #endif
960 
961   PetscFunctionBegin;
962 #if defined(PETSC_USE_COMPLEX)
963   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
964   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
965 #endif
966   PetscCall(MatSeqAIJGetArrayRead(A, &av));
967   ai = aa->i;
968   aj = aa->j;
969   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
970   if (reuse == MAT_INITIAL_MATRIX) {
971     /* count nz in the upper triangular part of A */
972     nz = 0;
973     if (!diagDense) {
974       for (i = 0; i < M; i++) {
975         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
976           for (j = ai[i]; j < ai[i + 1]; j++) {
977             if (aj[j] < i) continue;
978             nz++;
979           }
980         } else {
981           nz += ai[i + 1] - adiag[i];
982         }
983       }
984     } else {
985       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
986     }
987     PetscCall(PetscMalloc2(nz, &row, nz, &col));
988     PetscCall(PetscMalloc1(nz, &val));
989     mumps->nnz = nz;
990     mumps->irn = row;
991     mumps->jcn = col;
992     mumps->val = mumps->val_alloc = val;
993 
994     nz = 0;
995     if (!diagDense) {
996       for (i = 0; i < M; i++) {
997         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
998           for (j = ai[i]; j < ai[i + 1]; j++) {
999             if (aj[j] < i) continue;
1000             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1001             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1002             val[nz] = av[j];
1003             nz++;
1004           }
1005         } else {
1006           rnz = ai[i + 1] - adiag[i];
1007           ajj = aj + adiag[i];
1008           v1  = av + adiag[i];
1009           for (j = 0; j < rnz; j++) {
1010             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1011             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1012             val[nz++] = v1[j];
1013           }
1014         }
1015       }
1016     } else {
1017       for (i = 0; i < M; i++) {
1018         rnz = ai[i + 1] - adiag[i];
1019         ajj = aj + adiag[i];
1020         v1  = av + adiag[i];
1021         for (j = 0; j < rnz; j++) {
1022           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1023           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1024           val[nz++] = v1[j];
1025         }
1026       }
1027     }
1028   } else {
1029     nz  = 0;
1030     val = mumps->val;
1031     if (!diagDense) {
1032       for (i = 0; i < M; i++) {
1033         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1034           for (j = ai[i]; j < ai[i + 1]; j++) {
1035             if (aj[j] < i) continue;
1036             val[nz++] = av[j];
1037           }
1038         } else {
1039           rnz = ai[i + 1] - adiag[i];
1040           v1  = av + adiag[i];
1041           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1042         }
1043       }
1044     } else {
1045       for (i = 0; i < M; i++) {
1046         rnz = ai[i + 1] - adiag[i];
1047         v1  = av + adiag[i];
1048         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1049       }
1050     }
1051   }
1052   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1057 {
1058   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1059   PetscInt           bs;
1060   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1061   PetscMUMPSInt     *row, *col;
1062   const PetscScalar *av, *bv, *v1, *v2;
1063   PetscScalar       *val;
1064   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1065   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1066   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1067   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1068 #if defined(PETSC_USE_COMPLEX)
1069   PetscBool hermitian, isset;
1070 #endif
1071 
1072   PetscFunctionBegin;
1073 #if defined(PETSC_USE_COMPLEX)
1074   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1075   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1076 #endif
1077   PetscCall(MatGetBlockSize(A, &bs));
1078   rstart = A->rmap->rstart;
1079   ai     = aa->i;
1080   aj     = aa->j;
1081   bi     = bb->i;
1082   bj     = bb->j;
1083   av     = aa->a;
1084   bv     = bb->a;
1085 
1086   garray = mat->garray;
1087 
1088   if (reuse == MAT_INITIAL_MATRIX) {
1089     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1090     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1091     PetscCall(PetscMalloc1(nz, &val));
1092     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1093     mumps->irn = row;
1094     mumps->jcn = col;
1095     mumps->val = mumps->val_alloc = val;
1096   } else {
1097     val = mumps->val;
1098   }
1099 
1100   jj   = 0;
1101   irow = rstart;
1102   for (i = 0; i < mbs; i++) {
1103     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1104     countA = ai[i + 1] - ai[i];
1105     countB = bi[i + 1] - bi[i];
1106     bjj    = bj + bi[i];
1107     v1     = av + ai[i] * bs2;
1108     v2     = bv + bi[i] * bs2;
1109 
1110     if (bs > 1) {
1111       /* A-part */
1112       for (j = 0; j < countA; j++) {
1113         for (k = 0; k < bs; k++) {
1114           for (m = 0; m < bs; m++) {
1115             if (rstart + ajj[j] * bs > irow || k >= m) {
1116               if (reuse == MAT_INITIAL_MATRIX) {
1117                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1118                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1119               }
1120               val[jj++] = v1[j * bs2 + m + k * bs];
1121             }
1122           }
1123         }
1124       }
1125 
1126       /* B-part */
1127       for (j = 0; j < countB; j++) {
1128         for (k = 0; k < bs; k++) {
1129           for (m = 0; m < bs; m++) {
1130             if (reuse == MAT_INITIAL_MATRIX) {
1131               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1132               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1133             }
1134             val[jj++] = v2[j * bs2 + m + k * bs];
1135           }
1136         }
1137       }
1138     } else {
1139       /* A-part */
1140       for (j = 0; j < countA; j++) {
1141         if (reuse == MAT_INITIAL_MATRIX) {
1142           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1143           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1144         }
1145         val[jj++] = v1[j];
1146       }
1147 
1148       /* B-part */
1149       for (j = 0; j < countB; j++) {
1150         if (reuse == MAT_INITIAL_MATRIX) {
1151           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1153         }
1154         val[jj++] = v2[j];
1155       }
1156     }
1157     irow += bs;
1158   }
1159   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1164 {
1165   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1166   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1167   PetscMUMPSInt     *row, *col;
1168   const PetscScalar *av, *bv, *v1, *v2;
1169   PetscScalar       *val;
1170   Mat                Ad, Ao;
1171   Mat_SeqAIJ        *aa;
1172   Mat_SeqAIJ        *bb;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1176   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1177   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1178 
1179   aa = (Mat_SeqAIJ *)Ad->data;
1180   bb = (Mat_SeqAIJ *)Ao->data;
1181   ai = aa->i;
1182   aj = aa->j;
1183   bi = bb->i;
1184   bj = bb->j;
1185 
1186   rstart = A->rmap->rstart;
1187   cstart = A->cmap->rstart;
1188 
1189   if (reuse == MAT_INITIAL_MATRIX) {
1190     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1191     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1192     PetscCall(PetscMalloc1(nz, &val));
1193     mumps->nnz = nz;
1194     mumps->irn = row;
1195     mumps->jcn = col;
1196     mumps->val = mumps->val_alloc = val;
1197   } else {
1198     val = mumps->val;
1199   }
1200 
1201   jj   = 0;
1202   irow = rstart;
1203   for (i = 0; i < m; i++) {
1204     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1205     countA = ai[i + 1] - ai[i];
1206     countB = bi[i + 1] - bi[i];
1207     bjj    = bj + bi[i];
1208     v1     = av + ai[i];
1209     v2     = bv + bi[i];
1210 
1211     /* A-part */
1212     for (j = 0; j < countA; j++) {
1213       if (reuse == MAT_INITIAL_MATRIX) {
1214         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1215         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1216       }
1217       val[jj++] = v1[j];
1218     }
1219 
1220     /* B-part */
1221     for (j = 0; j < countB; j++) {
1222       if (reuse == MAT_INITIAL_MATRIX) {
1223         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1225       }
1226       val[jj++] = v2[j];
1227     }
1228     irow++;
1229   }
1230   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1231   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1232   PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234 
1235 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1236 {
1237   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1238   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1239   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1240   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1241   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1242   const PetscInt     bs2 = mat->bs2;
1243   PetscInt           bs;
1244   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1245   PetscMUMPSInt     *row, *col;
1246   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1247   PetscScalar       *val;
1248 
1249   PetscFunctionBegin;
1250   PetscCall(MatGetBlockSize(A, &bs));
1251   if (reuse == MAT_INITIAL_MATRIX) {
1252     nz = bs2 * (aa->nz + bb->nz);
1253     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1254     PetscCall(PetscMalloc1(nz, &val));
1255     mumps->nnz = nz;
1256     mumps->irn = row;
1257     mumps->jcn = col;
1258     mumps->val = mumps->val_alloc = val;
1259   } else {
1260     val = mumps->val;
1261   }
1262 
1263   jj   = 0;
1264   irow = rstart;
1265   for (i = 0; i < mbs; i++) {
1266     countA = ai[i + 1] - ai[i];
1267     countB = bi[i + 1] - bi[i];
1268     ajj    = aj + ai[i];
1269     bjj    = bj + bi[i];
1270     v1     = av + bs2 * ai[i];
1271     v2     = bv + bs2 * bi[i];
1272 
1273     idx = 0;
1274     /* A-part */
1275     for (k = 0; k < countA; k++) {
1276       for (j = 0; j < bs; j++) {
1277         for (n = 0; n < bs; n++) {
1278           if (reuse == MAT_INITIAL_MATRIX) {
1279             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1280             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1281           }
1282           val[jj++] = v1[idx++];
1283         }
1284       }
1285     }
1286 
1287     idx = 0;
1288     /* B-part */
1289     for (k = 0; k < countB; k++) {
1290       for (j = 0; j < bs; j++) {
1291         for (n = 0; n < bs; n++) {
1292           if (reuse == MAT_INITIAL_MATRIX) {
1293             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1294             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1295           }
1296           val[jj++] = v2[idx++];
1297         }
1298       }
1299     }
1300     irow += bs;
1301   }
1302   PetscFunctionReturn(PETSC_SUCCESS);
1303 }
1304 
1305 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1306 {
1307   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1308   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1309   PetscMUMPSInt     *row, *col;
1310   const PetscScalar *av, *bv, *v1, *v2;
1311   PetscScalar       *val;
1312   Mat                Ad, Ao;
1313   Mat_SeqAIJ        *aa;
1314   Mat_SeqAIJ        *bb;
1315 #if defined(PETSC_USE_COMPLEX)
1316   PetscBool hermitian, isset;
1317 #endif
1318 
1319   PetscFunctionBegin;
1320 #if defined(PETSC_USE_COMPLEX)
1321   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1322   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1323 #endif
1324   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1325   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1326   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1327 
1328   aa = (Mat_SeqAIJ *)Ad->data;
1329   bb = (Mat_SeqAIJ *)Ao->data;
1330   ai = aa->i;
1331   aj = aa->j;
1332   bi = bb->i;
1333   bj = bb->j;
1334   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1335   rstart = A->rmap->rstart;
1336 
1337   if (reuse == MAT_INITIAL_MATRIX) {
1338     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1339     nzb = 0; /* num of upper triangular entries in mat->B */
1340     for (i = 0; i < m; i++) {
1341       nza += (ai[i + 1] - adiag[i]);
1342       countB = bi[i + 1] - bi[i];
1343       bjj    = bj + bi[i];
1344       for (j = 0; j < countB; j++) {
1345         if (garray[bjj[j]] > rstart) nzb++;
1346       }
1347     }
1348 
1349     nz = nza + nzb; /* total nz of upper triangular part of mat */
1350     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1351     PetscCall(PetscMalloc1(nz, &val));
1352     mumps->nnz = nz;
1353     mumps->irn = row;
1354     mumps->jcn = col;
1355     mumps->val = mumps->val_alloc = val;
1356   } else {
1357     val = mumps->val;
1358   }
1359 
1360   jj   = 0;
1361   irow = rstart;
1362   for (i = 0; i < m; i++) {
1363     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1364     v1     = av + adiag[i];
1365     countA = ai[i + 1] - adiag[i];
1366     countB = bi[i + 1] - bi[i];
1367     bjj    = bj + bi[i];
1368     v2     = bv + bi[i];
1369 
1370     /* A-part */
1371     for (j = 0; j < countA; j++) {
1372       if (reuse == MAT_INITIAL_MATRIX) {
1373         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1374         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1375       }
1376       val[jj++] = v1[j];
1377     }
1378 
1379     /* B-part */
1380     for (j = 0; j < countB; j++) {
1381       if (garray[bjj[j]] > rstart) {
1382         if (reuse == MAT_INITIAL_MATRIX) {
1383           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1384           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1385         }
1386         val[jj++] = v2[j];
1387       }
1388     }
1389     irow++;
1390   }
1391   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1392   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1397 {
1398   const PetscScalar *av;
1399   const PetscInt     M = A->rmap->n;
1400   PetscCount         i;
1401   PetscMUMPSInt     *row, *col;
1402   Vec                v;
1403 
1404   PetscFunctionBegin;
1405   PetscCall(MatDiagonalGetDiagonal(A, &v));
1406   PetscCall(VecGetArrayRead(v, &av));
1407   if (reuse == MAT_INITIAL_MATRIX) {
1408     PetscCall(PetscMalloc2(M, &row, M, &col));
1409     for (i = 0; i < M; i++) {
1410       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1411       col[i] = row[i];
1412     }
1413     mumps->val = (PetscScalar *)av;
1414     mumps->irn = row;
1415     mumps->jcn = col;
1416     mumps->nnz = M;
1417   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1418   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1419   PetscCall(VecRestoreArrayRead(v, &av));
1420   PetscFunctionReturn(PETSC_SUCCESS);
1421 }
1422 
1423 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1424 {
1425   PetscScalar   *v;
1426   const PetscInt m = A->rmap->n, N = A->cmap->N;
1427   PetscInt       lda;
1428   PetscCount     i, j;
1429   PetscMUMPSInt *row, *col;
1430 
1431   PetscFunctionBegin;
1432   PetscCall(MatDenseGetArray(A, &v));
1433   PetscCall(MatDenseGetLDA(A, &lda));
1434   if (reuse == MAT_INITIAL_MATRIX) {
1435     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1436     for (i = 0; i < m; i++) {
1437       col[i] = 0;
1438       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1439     }
1440     for (j = 1; j < N; j++) {
1441       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1442       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1443     }
1444     if (lda == m) mumps->val = v;
1445     else {
1446       PetscCall(PetscMalloc1(m * N, &mumps->val));
1447       mumps->val_alloc = mumps->val;
1448       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1449     }
1450     mumps->irn = row;
1451     mumps->jcn = col;
1452     mumps->nnz = m * N;
1453   } else {
1454     if (lda == m && !mumps->nest_vals) mumps->val = v;
1455     else {
1456       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1457     }
1458   }
1459   PetscCall(MatDenseRestoreArray(A, &v));
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1464 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1465 // and its rows and columns permuted
1466 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
1467 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1468 {
1469   Mat         A;
1470   PetscScalar s[2];
1471   PetscBool   isTrans, isHTrans, compare;
1472 
1473   PetscFunctionBegin;
1474   do {
1475     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1476     if (isTrans) {
1477       PetscCall(MatTransposeGetMat(*sub, &A));
1478       isHTrans = PETSC_FALSE;
1479     } else {
1480       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1481       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1482     }
1483     compare = (PetscBool)(isTrans || isHTrans);
1484     if (compare) {
1485       if (vshift && vscale) {
1486         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1487         if (!*conjugate) {
1488           *vshift += s[0] * *vscale;
1489           *vscale *= s[1];
1490         } else {
1491           *vshift += PetscConj(s[0]) * *vscale;
1492           *vscale *= PetscConj(s[1]);
1493         }
1494       }
1495       if (swap) *swap = (PetscBool)!*swap;
1496       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1497       *sub = A;
1498     }
1499   } while (compare);
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 
1503 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1504 {
1505   Mat     **mats;
1506   PetscInt  nr, nc;
1507   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1508 
1509   PetscFunctionBegin;
1510   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1511   if (reuse == MAT_INITIAL_MATRIX) {
1512     PetscMUMPSInt *irns, *jcns;
1513     PetscScalar   *vals;
1514     PetscCount     totnnz, cumnnz, maxnnz;
1515     PetscInt      *pjcns_w, Mbs = 0;
1516     IS            *rows, *cols;
1517     PetscInt     **rows_idx, **cols_idx;
1518 
1519     cumnnz = 0;
1520     maxnnz = 0;
1521     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1522     for (PetscInt r = 0; r < nr; r++) {
1523       for (PetscInt c = 0; c < nc; c++) {
1524         Mat sub = mats[r][c];
1525 
1526         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1527         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1528         if (sub) {
1529           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1530           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1531           MatInfo   info;
1532 
1533           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1534           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1535           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1536           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1537           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1538           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1539           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1540           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1541           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1542 
1543           if (chol) {
1544             if (r == c) {
1545               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1546               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1547               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1548               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1549               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1550               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1551             } else {
1552               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1553               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1554               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1555               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1556               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1557               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1558             }
1559           } else {
1560             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1561             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1562             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1563             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1564             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1565             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1566           }
1567           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1568           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1569           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1570           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1571           maxnnz = PetscMax(maxnnz, info.nz_used);
1572         }
1573       }
1574     }
1575 
1576     /* Allocate total COO */
1577     totnnz = cumnnz;
1578     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1579     PetscCall(PetscMalloc1(totnnz, &vals));
1580 
1581     /* Handle rows and column maps
1582        We directly map rows and use an SF for the columns */
1583     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1584     PetscCall(MatNestGetISs(A, rows, cols));
1585     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1586     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1587     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1588     else (void)maxnnz;
1589 
1590     cumnnz = 0;
1591     for (PetscInt r = 0; r < nr; r++) {
1592       for (PetscInt c = 0; c < nc; c++) {
1593         Mat             sub    = mats[r][c];
1594         const PetscInt *ridx   = rows_idx[r];
1595         const PetscInt *cidx   = cols_idx[c];
1596         PetscScalar     vscale = 1.0, vshift = 0.0;
1597         PetscInt        rst, size, bs;
1598         PetscSF         csf;
1599         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1600         PetscLayout     cmap;
1601         PetscInt        innz;
1602 
1603         mumps->nest_vals_start[r * nc + c] = cumnnz;
1604         if (c == r) {
1605           PetscCall(ISGetSize(rows[r], &size));
1606           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1607             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1608           }
1609           PetscCall(MatGetBlockSize(sub, &bs));
1610           Mbs += size / bs;
1611         }
1612         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1613 
1614         /* Extract inner blocks if needed */
1615         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1616         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1617 
1618         /* Get column layout to map off-process columns */
1619         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1620 
1621         /* Get row start to map on-process rows */
1622         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1623 
1624         /* Directly use the mumps datastructure and use C ordering for now */
1625         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1626 
1627         /* Swap the role of rows and columns indices for transposed blocks
1628            since we need values with global final ordering */
1629         if (swap) {
1630           cidx = rows_idx[r];
1631           ridx = cols_idx[c];
1632         }
1633 
1634         /* Communicate column indices
1635            This could have been done with a single SF but it would have complicated the code a lot.
1636            But since we do it only once, we pay the price of setting up an SF for each block */
1637         if (PetscDefined(USE_64BIT_INDICES)) {
1638           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1639         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1640         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1641         PetscCall(PetscIntCast(mumps->nnz, &innz));
1642         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1643         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1644         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1645         PetscCall(PetscSFDestroy(&csf));
1646 
1647         /* Import indices: use direct map for rows and mapped indices for columns */
1648         if (swap) {
1649           for (PetscInt k = 0; k < mumps->nnz; k++) {
1650             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1651             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1652           }
1653         } else {
1654           for (PetscInt k = 0; k < mumps->nnz; k++) {
1655             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1656             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1657           }
1658         }
1659 
1660         /* Import values to full COO */
1661         if (conjugate) { /* conjugate the entries */
1662           PetscScalar *v = vals + cumnnz;
1663           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1664         } else if (vscale != 1.0) {
1665           PetscScalar *v = vals + cumnnz;
1666           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1667         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1668 
1669         /* Shift new starting point and sanity check */
1670         cumnnz += mumps->nnz;
1671         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1672 
1673         /* Free scratch memory */
1674         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1675         PetscCall(PetscFree(mumps->val_alloc));
1676         mumps->val = NULL;
1677         mumps->nnz = 0;
1678       }
1679     }
1680     if (mumps->id.ICNTL(15) == 1) {
1681       if (Mbs != A->rmap->N) {
1682         PetscMPIInt rank, size;
1683 
1684         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1685         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1686         if (rank == 0) {
1687           PetscInt shift = 0;
1688 
1689           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1690           PetscCall(PetscFree(mumps->id.blkptr));
1691           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1692           mumps->id.blkptr[0] = 1;
1693           for (PetscInt i = 0; i < size; ++i) {
1694             for (PetscInt r = 0; r < nr; r++) {
1695               Mat             sub = mats[r][r];
1696               const PetscInt *ranges;
1697               PetscInt        bs;
1698 
1699               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1700               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1701               PetscCall(MatGetBlockSize(sub, &bs));
1702               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1703               shift += (ranges[i + 1] - ranges[i]) / bs;
1704             }
1705           }
1706         }
1707       } else mumps->id.ICNTL(15) = 0;
1708     }
1709     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1710     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1711     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1712     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1713     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1714     mumps->nest_vals_start[nr * nc] = cumnnz;
1715 
1716     /* Set pointers for final MUMPS data structure */
1717     mumps->nest_vals = vals;
1718     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1719     mumps->val       = vals;
1720     mumps->irn       = irns;
1721     mumps->jcn       = jcns;
1722     mumps->nnz       = cumnnz;
1723   } else {
1724     PetscScalar *oval = mumps->nest_vals;
1725     for (PetscInt r = 0; r < nr; r++) {
1726       for (PetscInt c = 0; c < nc; c++) {
1727         PetscBool   conjugate = PETSC_FALSE;
1728         Mat         sub       = mats[r][c];
1729         PetscScalar vscale = 1.0, vshift = 0.0;
1730         PetscInt    midx = r * nc + c;
1731 
1732         if (!mumps->nest_convert_to_triples[midx]) continue;
1733         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1734         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1735         mumps->val = oval + mumps->nest_vals_start[midx];
1736         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1737         if (conjugate) {
1738           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1739           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1740         } else if (vscale != 1.0) {
1741           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1742           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1743         }
1744       }
1745     }
1746     mumps->val = oval;
1747   }
1748   PetscFunctionReturn(PETSC_SUCCESS);
1749 }
1750 
1751 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1752 {
1753   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1754 
1755   PetscFunctionBegin;
1756   PetscCall(PetscFree(mumps->id.isol_loc));
1757   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1758   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1759   PetscCall(VecDestroy(&mumps->b_seq));
1760   PetscCall(VecDestroy(&mumps->x_seq));
1761   PetscCall(PetscFree(mumps->id.perm_in));
1762   PetscCall(PetscFree(mumps->id.blkvar));
1763   PetscCall(PetscFree(mumps->id.blkptr));
1764   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1765   PetscCall(PetscFree(mumps->val_alloc));
1766   PetscCall(PetscFree(mumps->info));
1767   PetscCall(PetscFree(mumps->ICNTL_pre));
1768   PetscCall(PetscFree(mumps->CNTL_pre));
1769   PetscCall(MatMumpsResetSchur_Private(mumps));
1770   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1771     mumps->id.job = JOB_END;
1772     PetscMUMPS_c(mumps);
1773     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1774     if (mumps->mumps_comm != MPI_COMM_NULL) {
1775       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1776       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1777     }
1778   }
1779   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1780 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1781   if (mumps->use_petsc_omp_support) {
1782     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1783     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1784     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1785   }
1786 #endif
1787   PetscCall(PetscFree(mumps->ia_alloc));
1788   PetscCall(PetscFree(mumps->ja_alloc));
1789   PetscCall(PetscFree(mumps->recvcount));
1790   PetscCall(PetscFree(mumps->reqs));
1791   PetscCall(PetscFree(mumps->irhs_loc));
1792   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1793   PetscCall(PetscFree(mumps->nest_vals));
1794   PetscCall(PetscFree(A->data));
1795 
1796   /* clear composed functions */
1797   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1798   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1799   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1800   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1801   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1802   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1803   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1804   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1805   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1812   PetscFunctionReturn(PETSC_SUCCESS);
1813 }
1814 
1815 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1816 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1817 {
1818   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1819   const PetscMPIInt ompsize = mumps->omp_comm_size;
1820   PetscInt          i, m, M, rstart;
1821 
1822   PetscFunctionBegin;
1823   PetscCall(MatGetSize(A, &M, NULL));
1824   PetscCall(MatGetLocalSize(A, &m, NULL));
1825   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1826   if (ompsize == 1) {
1827     if (!mumps->irhs_loc) {
1828       mumps->nloc_rhs = (PetscMUMPSInt)m;
1829       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1830       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1831       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1832     }
1833     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1834   } else {
1835 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1836     const PetscInt *ranges;
1837     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1838     MPI_Group       petsc_group, omp_group;
1839     PetscScalar    *recvbuf = NULL;
1840 
1841     if (mumps->is_omp_master) {
1842       /* Lazily initialize the omp stuff for distributed rhs */
1843       if (!mumps->irhs_loc) {
1844         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1845         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1846         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1847         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1848         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1849         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1850 
1851         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1852         mumps->nloc_rhs = 0;
1853         PetscCall(MatGetOwnershipRanges(A, &ranges));
1854         for (j = 0; j < ompsize; j++) {
1855           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1856           mumps->nloc_rhs += mumps->rhs_nrow[j];
1857         }
1858         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1859         for (j = k = 0; j < ompsize; j++) {
1860           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1861         }
1862 
1863         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1864         PetscCallMPI(MPI_Group_free(&petsc_group));
1865         PetscCallMPI(MPI_Group_free(&omp_group));
1866       }
1867 
1868       /* Realloc buffers when current nrhs is bigger than what we have met */
1869       if (nrhs > mumps->max_nrhs) {
1870         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1871         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1872         mumps->max_nrhs = nrhs;
1873       }
1874 
1875       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1876       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1877       mumps->rhs_disps[0] = 0;
1878       for (j = 1; j < ompsize; j++) {
1879         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1880         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1881       }
1882       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1883     }
1884 
1885     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1886     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1887 
1888     if (mumps->is_omp_master) {
1889       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1890         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1891         for (j = 0; j < ompsize; j++) {
1892           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1893           dst                    = dstbase;
1894           for (i = 0; i < nrhs; i++) {
1895             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1896             src += mumps->rhs_nrow[j];
1897             dst += mumps->nloc_rhs;
1898           }
1899           dstbase += mumps->rhs_nrow[j];
1900         }
1901       }
1902       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1903     }
1904 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1905   }
1906   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1907   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1908   mumps->id.lrhs_loc = mumps->nloc_rhs;
1909   mumps->id.irhs_loc = mumps->irhs_loc;
1910   PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912 
1913 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1914 {
1915   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1916   const PetscScalar *barray = NULL;
1917   PetscScalar       *array;
1918   IS                 is_iden, is_petsc;
1919   PetscInt           i;
1920   PetscBool          second_solve = PETSC_FALSE;
1921   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1922 
1923   PetscFunctionBegin;
1924   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1925                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1926                                    &cite1));
1927   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1928                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1929                                    &cite2));
1930 
1931   PetscCall(VecFlag(x, A->factorerrortype));
1932   if (A->factorerrortype) {
1933     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1934     PetscFunctionReturn(PETSC_SUCCESS);
1935   }
1936 
1937   mumps->id.nrhs = 1;
1938   if (mumps->petsc_size > 1) {
1939     if (mumps->ICNTL20 == 10) {
1940       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1941       PetscCall(VecGetArrayRead(b, &barray));
1942       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1943     } else {
1944       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1945       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1946       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1947       if (!mumps->myid) {
1948         PetscCall(VecGetArray(mumps->b_seq, &array));
1949         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1950       }
1951     }
1952   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1953     mumps->id.ICNTL(20) = 0;
1954     PetscCall(VecCopy(b, x));
1955     PetscCall(VecGetArray(x, &array));
1956     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1957   }
1958 
1959   /*
1960      handle condensation step of Schur complement (if any)
1961      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1962      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1963      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1964      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1965   */
1966   if (mumps->id.size_schur > 0) {
1967     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1968     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1969       second_solve = PETSC_TRUE;
1970       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1971       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1972     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1973   }
1974 
1975   mumps->id.job = JOB_SOLVE;
1976   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1977   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1978 
1979   /* handle expansion step of Schur complement (if any) */
1980   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1981   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1982     PetscCall(MatMumpsSolveSchur_Private(A));
1983     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1984   }
1985 
1986   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1987     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1988       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1989       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1990     }
1991     if (!mumps->scat_sol) { /* create scatter scat_sol */
1992       PetscInt *isol2_loc = NULL;
1993       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
1994       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
1995       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
1996       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
1997       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
1998       PetscCall(ISDestroy(&is_iden));
1999       PetscCall(ISDestroy(&is_petsc));
2000       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2001     }
2002 
2003     PetscScalar *xarray;
2004     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2005     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2006     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2007     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2008     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2009 
2010     if (mumps->ICNTL20 == 10) { // distributed RHS
2011       PetscCall(VecRestoreArrayRead(b, &barray));
2012     } else if (!mumps->myid) { // centralized RHS
2013       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2014     }
2015   } else {
2016     // id.rhs has the solution in mumps precision
2017     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2018     PetscCall(VecRestoreArray(x, &array));
2019   }
2020 
2021   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2022   PetscFunctionReturn(PETSC_SUCCESS);
2023 }
2024 
2025 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2026 {
2027   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2028   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2029 
2030   PetscFunctionBegin;
2031   mumps->id.ICNTL(9) = 0;
2032   PetscCall(MatSolve_MUMPS(A, b, x));
2033   mumps->id.ICNTL(9) = value;
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2038 {
2039   Mat                Bt = NULL;
2040   PetscBool          denseX, denseB, flg, flgT;
2041   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2042   PetscInt           i, nrhs, M, nrhsM;
2043   PetscScalar       *array;
2044   const PetscScalar *barray;
2045   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2046   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2047   PetscScalar       *sol_loc;
2048   void              *sol_loc_save;
2049   PetscCount         sol_loc_len_save;
2050   IS                 is_to, is_from;
2051   PetscInt           k, proc, j, m, myrstart;
2052   const PetscInt    *rstart;
2053   Vec                v_mpi, msol_loc;
2054   VecScatter         scat_sol;
2055   Vec                b_seq;
2056   VecScatter         scat_rhs;
2057   PetscScalar       *aa;
2058   PetscInt           spnr, *ia, *ja;
2059   Mat_MPIAIJ        *b = NULL;
2060 
2061   PetscFunctionBegin;
2062   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2063   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2064 
2065   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2066 
2067   if (denseB) {
2068     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2069     mumps->id.ICNTL(20) = 0; /* dense RHS */
2070   } else {                   /* sparse B */
2071     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2072     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2073     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2074     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2075     /* input B is transpose of actual RHS matrix,
2076      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2077     PetscCall(MatTransposeGetMat(B, &Bt));
2078     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2079   }
2080 
2081   PetscCall(MatGetSize(B, &M, &nrhs));
2082   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2083   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2084   mumps->id.lrhs = (PetscMUMPSInt)M;
2085 
2086   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2087     PetscScalar *aa;
2088     PetscInt     spnr, *ia, *ja;
2089     PetscBool    second_solve = PETSC_FALSE;
2090 
2091     PetscCall(MatDenseGetArray(X, &array));
2092     if (denseB) {
2093       /* copy B to X */
2094       PetscCall(MatDenseGetArrayRead(B, &barray));
2095       PetscCall(PetscArraycpy(array, barray, nrhsM));
2096       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2097     } else { /* sparse B */
2098       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2099       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2100       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2101       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2102       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2103     }
2104     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2105 
2106     /* handle condensation step of Schur complement (if any) */
2107     if (mumps->id.size_schur > 0) {
2108       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2109         second_solve = PETSC_TRUE;
2110         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2111         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2112       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2113     }
2114 
2115     mumps->id.job = JOB_SOLVE;
2116     PetscMUMPS_c(mumps);
2117     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2118 
2119     /* handle expansion step of Schur complement (if any) */
2120     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2121     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2122       PetscCall(MatMumpsSolveSchur_Private(A));
2123       for (j = 0; j < nrhs; ++j)
2124         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2125     }
2126 
2127     if (!denseB) { /* sparse B, restore ia, ja */
2128       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2129       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2130       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2131     }
2132 
2133     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2134     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2135     PetscCall(MatDenseRestoreArray(X, &array));
2136     PetscFunctionReturn(PETSC_SUCCESS);
2137   }
2138 
2139   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2140   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2141 
2142   /* create msol_loc to hold mumps local solution */
2143   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2144   sol_loc_save          = mumps->id.sol_loc;
2145   sol_loc_len_save      = mumps->id.sol_loc_len;
2146   mumps->id.isol_loc    = NULL; // an init state
2147   mumps->id.sol_loc     = NULL;
2148   mumps->id.sol_loc_len = 0;
2149 
2150   lsol_loc = mumps->id.lsol_loc;
2151   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2152   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2153   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2154   mumps->id.isol_loc = isol_loc;
2155 
2156   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2157 
2158   if (denseB) {
2159     if (mumps->ICNTL20 == 10) {
2160       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2161       PetscCall(MatDenseGetArrayRead(B, &barray));
2162       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2163       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2164       PetscCall(MatGetLocalSize(B, &m, NULL));
2165       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2166     } else {
2167       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2168       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2169         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2170         0, re-arrange B into desired order, which is a local operation.
2171       */
2172 
2173       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2174       /* wrap dense rhs matrix B into a vector v_mpi */
2175       PetscCall(MatGetLocalSize(B, &m, NULL));
2176       PetscCall(MatDenseGetArrayRead(B, &barray));
2177       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2178       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2179 
2180       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2181       if (!mumps->myid) {
2182         PetscInt *idx;
2183         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2184         PetscCall(PetscMalloc1(nrhsM, &idx));
2185         PetscCall(MatGetOwnershipRanges(B, &rstart));
2186         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2187           for (j = 0; j < nrhs; j++) {
2188             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2189           }
2190         }
2191 
2192         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2193         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2194         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2195       } else {
2196         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2197         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2198         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2199       }
2200 
2201       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2202       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2203       PetscCall(ISDestroy(&is_to));
2204       PetscCall(ISDestroy(&is_from));
2205       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2206 
2207       if (!mumps->myid) { /* define rhs on the host */
2208         PetscCall(VecGetArrayRead(b_seq, &barray));
2209         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2210         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2211       }
2212     }
2213   } else { /* sparse B */
2214     b = (Mat_MPIAIJ *)Bt->data;
2215 
2216     /* wrap dense X into a vector v_mpi */
2217     PetscCall(MatGetLocalSize(X, &m, NULL));
2218     PetscCall(MatDenseGetArrayRead(X, &barray));
2219     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2220     PetscCall(MatDenseRestoreArrayRead(X, &barray));
2221 
2222     if (!mumps->myid) {
2223       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2224       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2225       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2226       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2227       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2228     } else {
2229       mumps->id.irhs_ptr    = NULL;
2230       mumps->id.irhs_sparse = NULL;
2231       mumps->id.nz_rhs      = 0;
2232       if (mumps->id.rhs_sparse_len) {
2233         PetscCall(PetscFree(mumps->id.rhs_sparse));
2234         mumps->id.rhs_sparse_len = 0;
2235       }
2236     }
2237   }
2238 
2239   /* solve phase */
2240   mumps->id.job = JOB_SOLVE;
2241   PetscMUMPS_c(mumps);
2242   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2243 
2244   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2245   PetscCall(MatDenseGetArray(X, &array));
2246   PetscCall(VecPlaceArray(v_mpi, array));
2247 
2248   /* create scatter scat_sol */
2249   PetscCall(MatGetOwnershipRanges(X, &rstart));
2250   /* iidx: index for scatter mumps solution to PETSc X */
2251 
2252   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2253   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2254   for (i = 0; i < lsol_loc; i++) {
2255     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2256 
2257     for (proc = 0; proc < mumps->petsc_size; proc++) {
2258       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2259         myrstart = rstart[proc];
2260         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2261         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2262         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2263         break;
2264       }
2265     }
2266 
2267     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2268   }
2269   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2270   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2271   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2272   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2273   PetscCall(ISDestroy(&is_from));
2274   PetscCall(ISDestroy(&is_to));
2275   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2276   PetscCall(MatDenseRestoreArray(X, &array));
2277 
2278   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2279     mumps->id.sol_loc_len = 0;
2280     PetscCall(PetscFree(mumps->id.sol_loc));
2281   }
2282 
2283   // restore old values
2284   mumps->id.sol_loc     = sol_loc_save;
2285   mumps->id.sol_loc_len = sol_loc_len_save;
2286   mumps->id.isol_loc    = isol_loc_save;
2287 
2288   PetscCall(PetscFree2(sol_loc, isol_loc));
2289   PetscCall(PetscFree(idxx));
2290   PetscCall(VecDestroy(&msol_loc));
2291   PetscCall(VecDestroy(&v_mpi));
2292   if (!denseB) {
2293     if (!mumps->myid) {
2294       b = (Mat_MPIAIJ *)Bt->data;
2295       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2296       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2297       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2298     }
2299   } else {
2300     if (mumps->ICNTL20 == 0) {
2301       PetscCall(VecDestroy(&b_seq));
2302       PetscCall(VecScatterDestroy(&scat_rhs));
2303     }
2304   }
2305   PetscCall(VecScatterDestroy(&scat_sol));
2306   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2307   PetscFunctionReturn(PETSC_SUCCESS);
2308 }
2309 
2310 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2311 {
2312   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2313   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2314 
2315   PetscFunctionBegin;
2316   mumps->id.ICNTL(9) = 0;
2317   PetscCall(MatMatSolve_MUMPS(A, B, X));
2318   mumps->id.ICNTL(9) = value;
2319   PetscFunctionReturn(PETSC_SUCCESS);
2320 }
2321 
2322 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2323 {
2324   PetscBool flg;
2325   Mat       B;
2326 
2327   PetscFunctionBegin;
2328   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2329   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2330 
2331   /* Create B=Bt^T that uses Bt's data structure */
2332   PetscCall(MatCreateTranspose(Bt, &B));
2333 
2334   PetscCall(MatMatSolve_MUMPS(A, B, X));
2335   PetscCall(MatDestroy(&B));
2336   PetscFunctionReturn(PETSC_SUCCESS);
2337 }
2338 
2339 #if !defined(PETSC_USE_COMPLEX)
2340 /*
2341   input:
2342    F:        numeric factor
2343   output:
2344    nneg:     total number of negative pivots
2345    nzero:    total number of zero pivots
2346    npos:     (global dimension of F) - nneg - nzero
2347 */
2348 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2349 {
2350   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2351   PetscMPIInt size;
2352 
2353   PetscFunctionBegin;
2354   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2355   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2356   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2357 
2358   if (nneg) *nneg = mumps->id.INFOG(12);
2359   if (nzero || npos) {
2360     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2361     if (nzero) *nzero = mumps->id.INFOG(28);
2362     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2363   }
2364   PetscFunctionReturn(PETSC_SUCCESS);
2365 }
2366 #endif
2367 
2368 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2369 {
2370   PetscMPIInt    nreqs;
2371   PetscMUMPSInt *irn, *jcn;
2372   PetscMPIInt    count;
2373   PetscCount     totnnz, remain;
2374   const PetscInt osize = mumps->omp_comm_size;
2375   PetscScalar   *val;
2376 
2377   PetscFunctionBegin;
2378   if (osize > 1) {
2379     if (reuse == MAT_INITIAL_MATRIX) {
2380       /* master first gathers counts of nonzeros to receive */
2381       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2382       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2383 
2384       /* Then each computes number of send/recvs */
2385       if (mumps->is_omp_master) {
2386         /* Start from 1 since self communication is not done in MPI */
2387         nreqs = 0;
2388         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2389       } else {
2390         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2391       }
2392       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2393 
2394       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2395          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2396          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2397          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2398        */
2399       nreqs = 0; /* counter for actual send/recvs */
2400       if (mumps->is_omp_master) {
2401         totnnz = 0;
2402 
2403         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2404         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2405         PetscCall(PetscMalloc1(totnnz, &val));
2406 
2407         /* Self communication */
2408         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2409         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2410         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2411 
2412         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2413         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2414         PetscCall(PetscFree(mumps->val_alloc));
2415         mumps->nnz = totnnz;
2416         mumps->irn = irn;
2417         mumps->jcn = jcn;
2418         mumps->val = mumps->val_alloc = val;
2419 
2420         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2421         jcn += mumps->recvcount[0];
2422         val += mumps->recvcount[0];
2423 
2424         /* Remote communication */
2425         for (PetscMPIInt i = 1; i < osize; i++) {
2426           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2427           remain = mumps->recvcount[i] - count;
2428           while (count > 0) {
2429             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2430             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2431             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2432             irn += count;
2433             jcn += count;
2434             val += count;
2435             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2436             remain -= count;
2437           }
2438         }
2439       } else {
2440         irn    = mumps->irn;
2441         jcn    = mumps->jcn;
2442         val    = mumps->val;
2443         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2444         remain = mumps->nnz - count;
2445         while (count > 0) {
2446           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2447           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2448           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2449           irn += count;
2450           jcn += count;
2451           val += count;
2452           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453           remain -= count;
2454         }
2455       }
2456     } else {
2457       nreqs = 0;
2458       if (mumps->is_omp_master) {
2459         val = mumps->val + mumps->recvcount[0];
2460         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2461           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2462           remain = mumps->recvcount[i] - count;
2463           while (count > 0) {
2464             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2465             val += count;
2466             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2467             remain -= count;
2468           }
2469         }
2470       } else {
2471         val    = mumps->val;
2472         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2473         remain = mumps->nnz - count;
2474         while (count > 0) {
2475           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2476           val += count;
2477           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2478           remain -= count;
2479         }
2480       }
2481     }
2482     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2483     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2484   }
2485   PetscFunctionReturn(PETSC_SUCCESS);
2486 }
2487 
2488 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2489 {
2490   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2491 
2492   PetscFunctionBegin;
2493   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2494     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2495     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2496     PetscFunctionReturn(PETSC_SUCCESS);
2497   }
2498 
2499   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2500   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2501 
2502   /* numerical factorization phase */
2503   mumps->id.job = JOB_FACTNUMERIC;
2504   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2505     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2506   } else {
2507     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2508   }
2509 
2510   if (F->schur) {
2511     const PetscScalar *array;
2512     MUMPS_INT          size = mumps->id.size_schur;
2513     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2514     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2515     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2516   }
2517 
2518   PetscMUMPS_c(mumps);
2519   if (mumps->id.INFOG(1) < 0) {
2520     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2521     if (mumps->id.INFOG(1) == -10) {
2522       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2523       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2524     } else if (mumps->id.INFOG(1) == -13) {
2525       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2526       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2527     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2528       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2529       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2530     } else {
2531       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2532       F->factorerrortype = MAT_FACTOR_OTHER;
2533     }
2534   }
2535   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2536 
2537   F->assembled = PETSC_TRUE;
2538 
2539   if (F->schur) { /* reset Schur status to unfactored */
2540 #if defined(PETSC_HAVE_CUDA)
2541     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2542 #endif
2543     PetscScalar *array;
2544     PetscCall(MatDenseGetArray(F->schur, &array));
2545     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2546     PetscCall(MatDenseRestoreArray(F->schur, &array));
2547     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2548       mumps->id.ICNTL(19) = 2;
2549       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2550     }
2551     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2552   }
2553 
2554   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2555   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2556 
2557   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2558   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2559   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2560   // which the solution (in SOL_loc) is available on the local processor.
2561   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2562   // normally have the same contents for each of these calls. The only exception is the case of
2563   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2564   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2565   if (mumps->petsc_size > 1) {
2566     PetscInt     lsol_loc;
2567     PetscScalar *array;
2568 
2569     /* distributed solution; Create x_seq=sol_loc for repeated use */
2570     if (mumps->x_seq) {
2571       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2572       PetscCall(PetscFree(mumps->id.isol_loc));
2573       PetscCall(VecDestroy(&mumps->x_seq));
2574     }
2575     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2576     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2577     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2578     PetscCall(VecGetArray(mumps->x_seq, &array));
2579     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2580     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2581     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2582   }
2583   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 /* Sets MUMPS options from the options database */
2588 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2589 {
2590   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2591   PetscReal     cntl;
2592   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2593   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2594   PetscBool     flg   = PETSC_FALSE;
2595   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2596   void         *arr;
2597 
2598   PetscFunctionBegin;
2599   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2600   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2601     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2602     PetscInt       nthreads   = 0;
2603     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2604     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2605     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2606 
2607     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2608     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2609     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2610 
2611     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2612     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2613     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2614     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2615     if (mumps->use_petsc_omp_support) {
2616       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2617 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2618       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2619       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2620 #else
2621       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2622               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2623 #endif
2624     } else {
2625       mumps->omp_comm      = PETSC_COMM_SELF;
2626       mumps->mumps_comm    = mumps->petsc_comm;
2627       mumps->is_omp_master = PETSC_TRUE;
2628     }
2629     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2630     mumps->reqs = NULL;
2631     mumps->tag  = 0;
2632 
2633     if (mumps->mumps_comm != MPI_COMM_NULL) {
2634       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2635         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2636         MPI_Comm comm;
2637         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2638         mumps->mumps_comm = comm;
2639       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2640     }
2641 
2642     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2643     mumps->id.job          = JOB_INIT;
2644     mumps->id.par          = 1; /* host participates factorizaton and solve */
2645     mumps->id.sym          = mumps->sym;
2646 
2647     size          = mumps->id.size_schur;
2648     arr           = mumps->id.schur;
2649     listvar_schur = mumps->id.listvar_schur;
2650     nblk          = mumps->id.nblk;
2651     blkvar        = mumps->id.blkvar;
2652     blkptr        = mumps->id.blkptr;
2653     if (PetscDefined(USE_DEBUG)) {
2654       for (PetscInt i = 0; i < size; i++)
2655         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2656                    A->rmap->N);
2657     }
2658 
2659     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2660     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2661     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2662     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2663 
2664     PetscMUMPS_c(mumps);
2665     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2666 
2667     /* set PETSc-MUMPS default options - override MUMPS default */
2668     mumps->id.ICNTL(3) = 0;
2669     mumps->id.ICNTL(4) = 0;
2670     if (mumps->petsc_size == 1) {
2671       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2672       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2673     } else {
2674       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2675       mumps->id.ICNTL(21) = 1; /* distributed solution */
2676     }
2677     if (nblk && blkptr) {
2678       mumps->id.ICNTL(15) = 1;
2679       mumps->id.nblk      = nblk;
2680       mumps->id.blkvar    = blkvar;
2681       mumps->id.blkptr    = blkptr;
2682     } else mumps->id.ICNTL(15) = 0;
2683 
2684     /* restore cached ICNTL and CNTL values */
2685     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2686     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2687 
2688     PetscCall(PetscFree(mumps->ICNTL_pre));
2689     PetscCall(PetscFree(mumps->CNTL_pre));
2690 
2691     if (schur) {
2692       mumps->id.size_schur    = size;
2693       mumps->id.schur_lld     = size;
2694       mumps->id.schur         = arr;
2695       mumps->id.listvar_schur = listvar_schur;
2696       if (mumps->petsc_size > 1) {
2697         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2698 
2699         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2700         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2701         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2702         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2703       } else {
2704         if (F->factortype == MAT_FACTOR_LU) {
2705           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2706         } else {
2707           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2708         }
2709       }
2710       mumps->id.ICNTL(26) = -1;
2711     }
2712 
2713     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2714        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2715      */
2716     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2717     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2718 
2719     mumps->scat_rhs = NULL;
2720     mumps->scat_sol = NULL;
2721   }
2722   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2723   if (flg) mumps->id.ICNTL(1) = icntl;
2724   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2725   if (flg) mumps->id.ICNTL(2) = icntl;
2726   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2727   if (flg) mumps->id.ICNTL(3) = icntl;
2728 
2729   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2730   if (flg) mumps->id.ICNTL(4) = icntl;
2731   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2732 
2733   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2734   if (flg) mumps->id.ICNTL(6) = icntl;
2735 
2736   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2737   if (flg) {
2738     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2739     mumps->id.ICNTL(7) = icntl;
2740   }
2741 
2742   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2743   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2744   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2745   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2746   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2747   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2748   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2749   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2750   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2751   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2752   if (flg) {
2753     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2754     else if (mumps->id.ICNTL(15) > 0) {
2755       const PetscInt *bsizes;
2756       PetscInt        nblocks, p, *blkptr = NULL;
2757       PetscMPIInt    *recvcounts, *displs, n;
2758       PetscMPIInt     rank, size = 0;
2759 
2760       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2761       flg = PETSC_TRUE;
2762       for (p = 0; p < nblocks; ++p) {
2763         if (bsizes[p] > 1) break;
2764       }
2765       if (p == nblocks) flg = PETSC_FALSE;
2766       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2767       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2768         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2769         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2770         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2771         PetscCall(PetscMPIIntCast(nblocks, &n));
2772         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2773         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2774         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2775         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2776         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2777         if (rank == 0) {
2778           blkptr[0] = 1;
2779           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2780           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2781         }
2782         PetscCall(PetscFree2(recvcounts, displs));
2783         PetscCall(PetscFree(blkptr));
2784       }
2785     }
2786   }
2787   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2788   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
2789     PetscCall(MatDestroy(&F->schur));
2790     PetscCall(MatMumpsResetSchur_Private(mumps));
2791   }
2792 
2793   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2794      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2795      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2796      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2797      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2798      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2799    */
2800   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2801 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2802   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2803 #endif
2804   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2805   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2806 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2807   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2808 #endif
2809   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2810 
2811   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL));
2812   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2813   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2814   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2815 
2816   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2817   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2818   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2819   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2820   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2821   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2822   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2823   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2824   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2825   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2826   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2827   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2828   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2829   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2830   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2831   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2832   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2833 
2834   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2835   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2836   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2837   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2838   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2839   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2840   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2841   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2842   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2843   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2844   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2845   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2846 
2847   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
2848 
2849   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2850   if (ninfo) {
2851     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2852     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2853     mumps->ninfo = ninfo;
2854     for (i = 0; i < ninfo; i++) {
2855       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2856       mumps->info[i] = info[i];
2857     }
2858   }
2859   PetscOptionsEnd();
2860   PetscFunctionReturn(PETSC_SUCCESS);
2861 }
2862 
2863 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2864 {
2865   PetscFunctionBegin;
2866   if (mumps->id.INFOG(1) < 0) {
2867     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2868     if (mumps->id.INFOG(1) == -6) {
2869       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2870       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2871     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2872       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2873       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2874     } else {
2875       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2876       F->factorerrortype = MAT_FACTOR_OTHER;
2877     }
2878   }
2879   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2880   PetscFunctionReturn(PETSC_SUCCESS);
2881 }
2882 
2883 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2884 {
2885   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2886   Vec            b;
2887   const PetscInt M = A->rmap->N;
2888 
2889   PetscFunctionBegin;
2890   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2891     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2892     PetscFunctionReturn(PETSC_SUCCESS);
2893   }
2894 
2895   /* Set MUMPS options from the options database */
2896   PetscCall(MatSetFromOptions_MUMPS(F, A));
2897 
2898   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2899   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2900 
2901   /* analysis phase */
2902   mumps->id.job = JOB_FACTSYMBOLIC;
2903   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2904   switch (mumps->id.ICNTL(18)) {
2905   case 0: /* centralized assembled matrix input */
2906     if (!mumps->myid) {
2907       mumps->id.nnz = mumps->nnz;
2908       mumps->id.irn = mumps->irn;
2909       mumps->id.jcn = mumps->jcn;
2910       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2911       if (r && mumps->id.ICNTL(7) == 7) {
2912         mumps->id.ICNTL(7) = 1;
2913         if (!mumps->myid) {
2914           const PetscInt *idx;
2915           PetscInt        i;
2916 
2917           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2918           PetscCall(ISGetIndices(r, &idx));
2919           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2920           PetscCall(ISRestoreIndices(r, &idx));
2921         }
2922       }
2923     }
2924     break;
2925   case 3: /* distributed assembled matrix input (size>1) */
2926     mumps->id.nnz_loc = mumps->nnz;
2927     mumps->id.irn_loc = mumps->irn;
2928     mumps->id.jcn_loc = mumps->jcn;
2929     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2930     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2931       PetscCall(MatCreateVecs(A, NULL, &b));
2932       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2933       PetscCall(VecDestroy(&b));
2934     }
2935     break;
2936   }
2937   PetscMUMPS_c(mumps);
2938   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2939 
2940   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2941   F->ops->solve             = MatSolve_MUMPS;
2942   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2943   F->ops->matsolve          = MatMatSolve_MUMPS;
2944   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2945   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2946 
2947   mumps->matstruc = SAME_NONZERO_PATTERN;
2948   PetscFunctionReturn(PETSC_SUCCESS);
2949 }
2950 
2951 /* Note the PETSc r and c permutations are ignored */
2952 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2953 {
2954   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2955   Vec            b;
2956   const PetscInt M = A->rmap->N;
2957 
2958   PetscFunctionBegin;
2959   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2960     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2961     PetscFunctionReturn(PETSC_SUCCESS);
2962   }
2963 
2964   /* Set MUMPS options from the options database */
2965   PetscCall(MatSetFromOptions_MUMPS(F, A));
2966 
2967   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2968   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2969 
2970   /* analysis phase */
2971   mumps->id.job = JOB_FACTSYMBOLIC;
2972   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2973   switch (mumps->id.ICNTL(18)) {
2974   case 0: /* centralized assembled matrix input */
2975     if (!mumps->myid) {
2976       mumps->id.nnz = mumps->nnz;
2977       mumps->id.irn = mumps->irn;
2978       mumps->id.jcn = mumps->jcn;
2979       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2980     }
2981     break;
2982   case 3: /* distributed assembled matrix input (size>1) */
2983     mumps->id.nnz_loc = mumps->nnz;
2984     mumps->id.irn_loc = mumps->irn;
2985     mumps->id.jcn_loc = mumps->jcn;
2986     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2987     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2988       PetscCall(MatCreateVecs(A, NULL, &b));
2989       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2990       PetscCall(VecDestroy(&b));
2991     }
2992     break;
2993   }
2994   PetscMUMPS_c(mumps);
2995   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2996 
2997   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2998   F->ops->solve             = MatSolve_MUMPS;
2999   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3000   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3001 
3002   mumps->matstruc = SAME_NONZERO_PATTERN;
3003   PetscFunctionReturn(PETSC_SUCCESS);
3004 }
3005 
3006 /* Note the PETSc r permutation and factor info are ignored */
3007 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3008 {
3009   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3010   Vec            b;
3011   const PetscInt M = A->rmap->N;
3012 
3013   PetscFunctionBegin;
3014   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3015     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3016     PetscFunctionReturn(PETSC_SUCCESS);
3017   }
3018 
3019   /* Set MUMPS options from the options database */
3020   PetscCall(MatSetFromOptions_MUMPS(F, A));
3021 
3022   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3023   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3024 
3025   /* analysis phase */
3026   mumps->id.job = JOB_FACTSYMBOLIC;
3027   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3028   switch (mumps->id.ICNTL(18)) {
3029   case 0: /* centralized assembled matrix input */
3030     if (!mumps->myid) {
3031       mumps->id.nnz = mumps->nnz;
3032       mumps->id.irn = mumps->irn;
3033       mumps->id.jcn = mumps->jcn;
3034       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3035     }
3036     break;
3037   case 3: /* distributed assembled matrix input (size>1) */
3038     mumps->id.nnz_loc = mumps->nnz;
3039     mumps->id.irn_loc = mumps->irn;
3040     mumps->id.jcn_loc = mumps->jcn;
3041     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3042     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3043       PetscCall(MatCreateVecs(A, NULL, &b));
3044       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3045       PetscCall(VecDestroy(&b));
3046     }
3047     break;
3048   }
3049   PetscMUMPS_c(mumps);
3050   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3051 
3052   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3053   F->ops->solve                 = MatSolve_MUMPS;
3054   F->ops->solvetranspose        = MatSolve_MUMPS;
3055   F->ops->matsolve              = MatMatSolve_MUMPS;
3056   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3057   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3058 #if defined(PETSC_USE_COMPLEX)
3059   F->ops->getinertia = NULL;
3060 #else
3061   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3062 #endif
3063 
3064   mumps->matstruc = SAME_NONZERO_PATTERN;
3065   PetscFunctionReturn(PETSC_SUCCESS);
3066 }
3067 
3068 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3069 {
3070   PetscBool         isascii;
3071   PetscViewerFormat format;
3072   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3073 
3074   PetscFunctionBegin;
3075   /* check if matrix is mumps type */
3076   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3077 
3078   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3079   if (isascii) {
3080     PetscCall(PetscViewerGetFormat(viewer, &format));
3081     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3082       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3083       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3084         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3085         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3086         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3087         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3088         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3089         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3090         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3091         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3092         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3093         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3094         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3095         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3096         if (mumps->id.ICNTL(11) > 0) {
3097           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3098           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3099           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3100           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3101           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3102           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3103         }
3104         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3105         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3106         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3107         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3108         /* ICNTL(15-17) not used */
3109         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3110         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3111         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3112         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3113         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3114         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3115 
3116         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3117         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3118         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3119         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3120         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3121         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
3122 
3123         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3124         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3125         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3126         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3127         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3128         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3129         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3130         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3131         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3132         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3133         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3134 
3135         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3136         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3137         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3138         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3139         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3140         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3141 
3142         /* information local to each processor */
3143         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3144         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3145         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3146         PetscCall(PetscViewerFlush(viewer));
3147         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3148         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3149         PetscCall(PetscViewerFlush(viewer));
3150         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3151         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3152         PetscCall(PetscViewerFlush(viewer));
3153 
3154         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3155         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3156         PetscCall(PetscViewerFlush(viewer));
3157 
3158         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3159         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3160         PetscCall(PetscViewerFlush(viewer));
3161 
3162         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3163         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3164         PetscCall(PetscViewerFlush(viewer));
3165 
3166         if (mumps->ninfo && mumps->ninfo <= 80) {
3167           PetscInt i;
3168           for (i = 0; i < mumps->ninfo; i++) {
3169             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3170             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3171             PetscCall(PetscViewerFlush(viewer));
3172           }
3173         }
3174         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3175       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3176 
3177       if (mumps->myid == 0) { /* information from the host */
3178         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3179         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3180         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3181         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3182 
3183         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3184         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3185         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3186         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3187         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3188         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3189         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3190         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3191         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3192         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3193         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3194         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3195         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3196         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3197         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3198         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3199         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3200         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3202         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3203         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3204         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3206         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3207         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3208         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3210         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3211         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3212         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3213         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3214         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3215         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3216         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3217       }
3218     }
3219   }
3220   PetscFunctionReturn(PETSC_SUCCESS);
3221 }
3222 
3223 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3224 {
3225   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3226 
3227   PetscFunctionBegin;
3228   info->block_size        = 1.0;
3229   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3230   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3231   info->nz_unneeded       = 0.0;
3232   info->assemblies        = 0.0;
3233   info->mallocs           = 0.0;
3234   info->memory            = 0.0;
3235   info->fill_ratio_given  = 0;
3236   info->fill_ratio_needed = 0;
3237   info->factor_mallocs    = 0;
3238   PetscFunctionReturn(PETSC_SUCCESS);
3239 }
3240 
3241 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3242 {
3243   Mat_MUMPS         *mumps = (Mat_MUMPS *)F->data;
3244   const PetscScalar *arr;
3245   const PetscInt    *idxs;
3246   PetscInt           size, i;
3247 
3248   PetscFunctionBegin;
3249   PetscCall(ISGetLocalSize(is, &size));
3250   /* Schur complement matrix */
3251   PetscCall(MatDestroy(&F->schur));
3252   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3253   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3254   // don't allocate mumps->id.schur[] now as its precision is yet to know
3255   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3256   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3257   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
3258   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3259 
3260   /* MUMPS expects Fortran style indices */
3261   PetscCall(PetscFree(mumps->id.listvar_schur));
3262   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3263   PetscCall(ISGetIndices(is, &idxs));
3264   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3265   PetscCall(ISRestoreIndices(is, &idxs));
3266   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3267   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3268   else mumps->ICNTL26 = -1;
3269   PetscFunctionReturn(PETSC_SUCCESS);
3270 }
3271 
3272 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3273 {
3274   Mat          St;
3275   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3276   PetscScalar *array;
3277   PetscInt     i, j, N = mumps->id.size_schur;
3278 
3279   PetscFunctionBegin;
3280   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3281   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3282   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3283   PetscCall(MatSetType(St, MATDENSE));
3284   PetscCall(MatSetUp(St));
3285   PetscCall(MatDenseGetArray(St, &array));
3286   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3287     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3288       for (i = 0; i < N; i++) {
3289         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3290       }
3291     } else { /* stored by columns */
3292       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3293     }
3294   } else {                          /* either full or lower-triangular (not packed) */
3295     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3296       for (i = 0; i < N; i++) {
3297         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3298       }
3299     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3300       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3301     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3302       for (i = 0; i < N; i++) {
3303         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3304       }
3305     }
3306   }
3307   PetscCall(MatDenseRestoreArray(St, &array));
3308   *S = St;
3309   PetscFunctionReturn(PETSC_SUCCESS);
3310 }
3311 
3312 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3313 {
3314   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3315 
3316   PetscFunctionBegin;
3317   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3318     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3319     for (i = 0; i < nICNTL_pre; ++i)
3320       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3321     if (i == nICNTL_pre) {                             /* not already cached */
3322       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3323       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3324       mumps->ICNTL_pre[0]++;
3325     }
3326     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3327     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3328   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3329   PetscFunctionReturn(PETSC_SUCCESS);
3330 }
3331 
3332 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3333 {
3334   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3335 
3336   PetscFunctionBegin;
3337   if (mumps->id.job == JOB_NULL) {
3338     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3339     *ival = 0;
3340     for (i = 0; i < nICNTL_pre; ++i) {
3341       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3342     }
3343   } else *ival = mumps->id.ICNTL(icntl);
3344   PetscFunctionReturn(PETSC_SUCCESS);
3345 }
3346 
3347 /*@
3348   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3349 
3350   Logically Collective
3351 
3352   Input Parameters:
3353 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3354 . icntl - index of MUMPS parameter array `ICNTL()`
3355 - ival  - value of MUMPS `ICNTL(icntl)`
3356 
3357   Options Database Key:
3358 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3359 
3360   Level: beginner
3361 
3362   Note:
3363   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3364 
3365 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3366 @*/
3367 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3368 {
3369   PetscFunctionBegin;
3370   PetscValidType(F, 1);
3371   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3372   PetscValidLogicalCollectiveInt(F, icntl, 2);
3373   PetscValidLogicalCollectiveInt(F, ival, 3);
3374   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3375   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3376   PetscFunctionReturn(PETSC_SUCCESS);
3377 }
3378 
3379 /*@
3380   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3381 
3382   Logically Collective
3383 
3384   Input Parameters:
3385 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3386 - icntl - index of MUMPS parameter array ICNTL()
3387 
3388   Output Parameter:
3389 . ival - value of MUMPS ICNTL(icntl)
3390 
3391   Level: beginner
3392 
3393 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3394 @*/
3395 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3396 {
3397   PetscFunctionBegin;
3398   PetscValidType(F, 1);
3399   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3400   PetscValidLogicalCollectiveInt(F, icntl, 2);
3401   PetscAssertPointer(ival, 3);
3402   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3403   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3404   PetscFunctionReturn(PETSC_SUCCESS);
3405 }
3406 
3407 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3408 {
3409   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3410 
3411   PetscFunctionBegin;
3412   if (mumps->id.job == JOB_NULL) {
3413     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3414     for (i = 0; i < nCNTL_pre; ++i)
3415       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3416     if (i == nCNTL_pre) {
3417       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3418       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3419       mumps->CNTL_pre[0]++;
3420     }
3421     mumps->CNTL_pre[1 + 2 * i] = icntl;
3422     mumps->CNTL_pre[2 + 2 * i] = val;
3423   } else ID_CNTL_SET(mumps->id, icntl, val);
3424   PetscFunctionReturn(PETSC_SUCCESS);
3425 }
3426 
3427 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3428 {
3429   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3430 
3431   PetscFunctionBegin;
3432   if (mumps->id.job == JOB_NULL) {
3433     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3434     *val = 0.0;
3435     for (i = 0; i < nCNTL_pre; ++i) {
3436       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3437     }
3438   } else *val = ID_CNTL_GET(mumps->id, icntl);
3439   PetscFunctionReturn(PETSC_SUCCESS);
3440 }
3441 
3442 /*@
3443   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3444 
3445   Logically Collective
3446 
3447   Input Parameters:
3448 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3449 . icntl - index of MUMPS parameter array `CNTL()`
3450 - val   - value of MUMPS `CNTL(icntl)`
3451 
3452   Options Database Key:
3453 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3454 
3455   Level: beginner
3456 
3457   Note:
3458   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3459 
3460 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3461 @*/
3462 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3463 {
3464   PetscFunctionBegin;
3465   PetscValidType(F, 1);
3466   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3467   PetscValidLogicalCollectiveInt(F, icntl, 2);
3468   PetscValidLogicalCollectiveReal(F, val, 3);
3469   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3470   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3471   PetscFunctionReturn(PETSC_SUCCESS);
3472 }
3473 
3474 /*@
3475   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3476 
3477   Logically Collective
3478 
3479   Input Parameters:
3480 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3481 - icntl - index of MUMPS parameter array CNTL()
3482 
3483   Output Parameter:
3484 . val - value of MUMPS CNTL(icntl)
3485 
3486   Level: beginner
3487 
3488 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3489 @*/
3490 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3491 {
3492   PetscFunctionBegin;
3493   PetscValidType(F, 1);
3494   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3495   PetscValidLogicalCollectiveInt(F, icntl, 2);
3496   PetscAssertPointer(val, 3);
3497   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3498   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3499   PetscFunctionReturn(PETSC_SUCCESS);
3500 }
3501 
3502 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3503 {
3504   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3505 
3506   PetscFunctionBegin;
3507   *info = mumps->id.INFO(icntl);
3508   PetscFunctionReturn(PETSC_SUCCESS);
3509 }
3510 
3511 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3512 {
3513   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3514 
3515   PetscFunctionBegin;
3516   *infog = mumps->id.INFOG(icntl);
3517   PetscFunctionReturn(PETSC_SUCCESS);
3518 }
3519 
3520 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3521 {
3522   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3523 
3524   PetscFunctionBegin;
3525   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3526   PetscFunctionReturn(PETSC_SUCCESS);
3527 }
3528 
3529 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3530 {
3531   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3532 
3533   PetscFunctionBegin;
3534   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3535   PetscFunctionReturn(PETSC_SUCCESS);
3536 }
3537 
3538 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3539 {
3540   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3541 
3542   PetscFunctionBegin;
3543   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3544   *size  = 0;
3545   *array = NULL;
3546   if (!mumps->myid) {
3547     *size = mumps->id.INFOG(28);
3548     PetscCall(PetscMalloc1(*size, array));
3549     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3550   }
3551   PetscFunctionReturn(PETSC_SUCCESS);
3552 }
3553 
3554 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3555 {
3556   Mat          Bt = NULL, Btseq = NULL;
3557   PetscBool    flg;
3558   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3559   PetscScalar *aa;
3560   PetscInt     spnr, *ia, *ja, M, nrhs;
3561 
3562   PetscFunctionBegin;
3563   PetscAssertPointer(spRHS, 2);
3564   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3565   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3566   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3567   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3568 
3569   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3570 
3571   if (mumps->petsc_size > 1) {
3572     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3573     Btseq         = b->A;
3574   } else {
3575     Btseq = Bt;
3576   }
3577 
3578   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3579   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3580   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3581   mumps->id.rhs = NULL;
3582 
3583   if (!mumps->myid) {
3584     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3585     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3586     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3587     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3588     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3589   } else {
3590     mumps->id.irhs_ptr    = NULL;
3591     mumps->id.irhs_sparse = NULL;
3592     mumps->id.nz_rhs      = 0;
3593     if (mumps->id.rhs_sparse_len) {
3594       PetscCall(PetscFree(mumps->id.rhs_sparse));
3595       mumps->id.rhs_sparse_len = 0;
3596     }
3597   }
3598   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3599   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3600 
3601   /* solve phase */
3602   mumps->id.job = JOB_SOLVE;
3603   PetscMUMPS_c(mumps);
3604   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3605 
3606   if (!mumps->myid) {
3607     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3608     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3609     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3610   }
3611   PetscFunctionReturn(PETSC_SUCCESS);
3612 }
3613 
3614 /*@
3615   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3616 
3617   Logically Collective
3618 
3619   Input Parameter:
3620 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3621 
3622   Output Parameter:
3623 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3624 
3625   Level: beginner
3626 
3627 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3628 @*/
3629 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3630 {
3631   PetscFunctionBegin;
3632   PetscValidType(F, 1);
3633   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3634   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3635   PetscFunctionReturn(PETSC_SUCCESS);
3636 }
3637 
3638 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3639 {
3640   Mat spRHS;
3641 
3642   PetscFunctionBegin;
3643   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3644   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3645   PetscCall(MatDestroy(&spRHS));
3646   PetscFunctionReturn(PETSC_SUCCESS);
3647 }
3648 
3649 /*@
3650   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3651 
3652   Logically Collective
3653 
3654   Input Parameter:
3655 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3656 
3657   Output Parameter:
3658 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3659 
3660   Level: beginner
3661 
3662 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3663 @*/
3664 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3665 {
3666   PetscBool flg;
3667 
3668   PetscFunctionBegin;
3669   PetscValidType(F, 1);
3670   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3671   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3672   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3673   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3674   PetscFunctionReturn(PETSC_SUCCESS);
3675 }
3676 
3677 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3678 {
3679   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3680 
3681   PetscFunctionBegin;
3682   if (nblk) {
3683     PetscAssertPointer(blkptr, 4);
3684     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3685     PetscCall(PetscFree(mumps->id.blkptr));
3686     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3687     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3688     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3689     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3690     // mumps->id.ICNTL(15) = 1;
3691     if (blkvar) {
3692       PetscCall(PetscFree(mumps->id.blkvar));
3693       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3694       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3695     }
3696   } else {
3697     PetscCall(PetscFree(mumps->id.blkptr));
3698     PetscCall(PetscFree(mumps->id.blkvar));
3699     // mumps->id.ICNTL(15) = 0;
3700     mumps->id.nblk = 0;
3701   }
3702   PetscFunctionReturn(PETSC_SUCCESS);
3703 }
3704 
3705 /*@
3706   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3707 
3708   Not collective, only relevant on the first process of the MPI communicator
3709 
3710   Input Parameters:
3711 + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3712 . nblk   - the number of blocks
3713 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3714 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3715 
3716   Level: advanced
3717 
3718 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3719 @*/
3720 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3721 {
3722   PetscFunctionBegin;
3723   PetscValidType(F, 1);
3724   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3725   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3726   PetscFunctionReturn(PETSC_SUCCESS);
3727 }
3728 
3729 /*@
3730   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3731 
3732   Logically Collective
3733 
3734   Input Parameters:
3735 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3736 - icntl - index of MUMPS parameter array INFO()
3737 
3738   Output Parameter:
3739 . ival - value of MUMPS INFO(icntl)
3740 
3741   Level: beginner
3742 
3743 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3744 @*/
3745 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3746 {
3747   PetscFunctionBegin;
3748   PetscValidType(F, 1);
3749   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3750   PetscAssertPointer(ival, 3);
3751   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3752   PetscFunctionReturn(PETSC_SUCCESS);
3753 }
3754 
3755 /*@
3756   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3757 
3758   Logically Collective
3759 
3760   Input Parameters:
3761 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3762 - icntl - index of MUMPS parameter array INFOG()
3763 
3764   Output Parameter:
3765 . ival - value of MUMPS INFOG(icntl)
3766 
3767   Level: beginner
3768 
3769 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3770 @*/
3771 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3772 {
3773   PetscFunctionBegin;
3774   PetscValidType(F, 1);
3775   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3776   PetscAssertPointer(ival, 3);
3777   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3778   PetscFunctionReturn(PETSC_SUCCESS);
3779 }
3780 
3781 /*@
3782   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3783 
3784   Logically Collective
3785 
3786   Input Parameters:
3787 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3788 - icntl - index of MUMPS parameter array RINFO()
3789 
3790   Output Parameter:
3791 . val - value of MUMPS RINFO(icntl)
3792 
3793   Level: beginner
3794 
3795 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3796 @*/
3797 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3798 {
3799   PetscFunctionBegin;
3800   PetscValidType(F, 1);
3801   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3802   PetscAssertPointer(val, 3);
3803   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3804   PetscFunctionReturn(PETSC_SUCCESS);
3805 }
3806 
3807 /*@
3808   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3809 
3810   Logically Collective
3811 
3812   Input Parameters:
3813 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3814 - icntl - index of MUMPS parameter array RINFOG()
3815 
3816   Output Parameter:
3817 . val - value of MUMPS RINFOG(icntl)
3818 
3819   Level: beginner
3820 
3821 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3822 @*/
3823 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3824 {
3825   PetscFunctionBegin;
3826   PetscValidType(F, 1);
3827   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3828   PetscAssertPointer(val, 3);
3829   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3830   PetscFunctionReturn(PETSC_SUCCESS);
3831 }
3832 
3833 /*@
3834   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3835 
3836   Logically Collective
3837 
3838   Input Parameter:
3839 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3840 
3841   Output Parameters:
3842 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3843 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3844           for freeing this array.
3845 
3846   Level: beginner
3847 
3848 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3849 @*/
3850 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3851 {
3852   PetscFunctionBegin;
3853   PetscValidType(F, 1);
3854   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3855   PetscAssertPointer(size, 2);
3856   PetscAssertPointer(array, 3);
3857   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3858   PetscFunctionReturn(PETSC_SUCCESS);
3859 }
3860 
3861 /*MC
3862   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3863   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3864 
3865   Works with `MATAIJ` and `MATSBAIJ` matrices
3866 
3867   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3868 
3869   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3870   See details below.
3871 
3872   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3873 
3874   Options Database Keys:
3875 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3876 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3877 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3878 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3879 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3880 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3881                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3882 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3883 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3884 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3885 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3886 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3887 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3888 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3889 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3890 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3891 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3892 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3893 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3894 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3895 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3896 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3897 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3898 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3899 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3900 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3901 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3902 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3903 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3904 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3905 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3906 .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3907 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3908 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3909 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3910 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3911 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3912 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3913 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3914 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3915                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3916 
3917   Level: beginner
3918 
3919   Notes:
3920   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3921   error if the matrix is Hermitian.
3922 
3923   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3924   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3925 
3926   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3927   the failure with
3928 .vb
3929           KSPGetPC(ksp,&pc);
3930           PCFactorGetMatrix(pc,&mat);
3931           MatMumpsGetInfo(mat,....);
3932           MatMumpsGetInfog(mat,....); etc.
3933 .ve
3934   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
3935 
3936   MUMPS provides 64-bit integer support in two build modes:
3937   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
3938   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
3939 
3940   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
3941   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
3942   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
3943   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
3944 
3945   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
3946 
3947   Two modes to run MUMPS/PETSc with OpenMP
3948 .vb
3949    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
3950    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test".
3951 .ve
3952 
3953 .vb
3954    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
3955    if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
3956 .ve
3957 
3958    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
3959    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
3960    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
3961    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
3962    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
3963 
3964    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
3965    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
3966    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
3967    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
3968    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
3969    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
3970    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
3971    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
3972    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
3973    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
3974    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
3975    examine the mapping result.
3976 
3977    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
3978    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
3979    calls `omp_set_num_threads`(m) internally before calling MUMPS.
3980 
3981    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
3982 
3983 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
3984 M*/
3985 
3986 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
3987 {
3988   PetscFunctionBegin;
3989   *type = MATSOLVERMUMPS;
3990   PetscFunctionReturn(PETSC_SUCCESS);
3991 }
3992 
3993 /* MatGetFactor for Seq and MPI AIJ matrices */
3994 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
3995 {
3996   Mat         B;
3997   Mat_MUMPS  *mumps;
3998   PetscBool   isSeqAIJ, isDiag, isDense;
3999   PetscMPIInt size;
4000 
4001   PetscFunctionBegin;
4002 #if defined(PETSC_USE_COMPLEX)
4003   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4004     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4005     *F = NULL;
4006     PetscFunctionReturn(PETSC_SUCCESS);
4007   }
4008 #endif
4009   /* Create the factorization matrix */
4010   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4011   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4012   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4013   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4014   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4015   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4016   PetscCall(MatSetUp(B));
4017 
4018   PetscCall(PetscNew(&mumps));
4019 
4020   B->ops->view    = MatView_MUMPS;
4021   B->ops->getinfo = MatGetInfo_MUMPS;
4022 
4023   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4024   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4025   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4026   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4027   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4028   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4029   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4030   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4031   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4032   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4033   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4034   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4035   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4036   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4037   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4038 
4039   if (ftype == MAT_FACTOR_LU) {
4040     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4041     B->factortype            = MAT_FACTOR_LU;
4042     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4043     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4044     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4045     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4046     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4047     mumps->sym = 0;
4048   } else {
4049     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4050     B->factortype                  = MAT_FACTOR_CHOLESKY;
4051     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4052     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4053     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4054     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4055     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4056 #if defined(PETSC_USE_COMPLEX)
4057     mumps->sym = 2;
4058 #else
4059     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4060     else mumps->sym = 2;
4061 #endif
4062   }
4063 
4064   /* set solvertype */
4065   PetscCall(PetscFree(B->solvertype));
4066   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4067   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4068   if (size == 1) {
4069     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4070     B->canuseordering = PETSC_TRUE;
4071   }
4072   B->ops->destroy = MatDestroy_MUMPS;
4073   B->data         = (void *)mumps;
4074 
4075   *F               = B;
4076   mumps->id.job    = JOB_NULL;
4077   mumps->ICNTL_pre = NULL;
4078   mumps->CNTL_pre  = NULL;
4079   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4080   PetscFunctionReturn(PETSC_SUCCESS);
4081 }
4082 
4083 /* MatGetFactor for Seq and MPI SBAIJ matrices */
4084 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4085 {
4086   Mat         B;
4087   Mat_MUMPS  *mumps;
4088   PetscBool   isSeqSBAIJ;
4089   PetscMPIInt size;
4090 
4091   PetscFunctionBegin;
4092 #if defined(PETSC_USE_COMPLEX)
4093   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4094     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4095     *F = NULL;
4096     PetscFunctionReturn(PETSC_SUCCESS);
4097   }
4098 #endif
4099   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4100   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4101   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4102   PetscCall(MatSetUp(B));
4103 
4104   PetscCall(PetscNew(&mumps));
4105   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4106   if (isSeqSBAIJ) {
4107     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4108   } else {
4109     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4110   }
4111 
4112   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4113   B->ops->view                   = MatView_MUMPS;
4114   B->ops->getinfo                = MatGetInfo_MUMPS;
4115 
4116   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4117   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4118   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4119   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4120   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4121   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4122   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4123   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4124   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4125   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4126   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4127   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4128   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4129   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4130   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4131 
4132   B->factortype = MAT_FACTOR_CHOLESKY;
4133 #if defined(PETSC_USE_COMPLEX)
4134   mumps->sym = 2;
4135 #else
4136   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4137   else mumps->sym = 2;
4138 #endif
4139 
4140   /* set solvertype */
4141   PetscCall(PetscFree(B->solvertype));
4142   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4143   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4144   if (size == 1) {
4145     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4146     B->canuseordering = PETSC_TRUE;
4147   }
4148   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4149   B->ops->destroy = MatDestroy_MUMPS;
4150   B->data         = (void *)mumps;
4151 
4152   *F               = B;
4153   mumps->id.job    = JOB_NULL;
4154   mumps->ICNTL_pre = NULL;
4155   mumps->CNTL_pre  = NULL;
4156   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4157   PetscFunctionReturn(PETSC_SUCCESS);
4158 }
4159 
4160 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4161 {
4162   Mat         B;
4163   Mat_MUMPS  *mumps;
4164   PetscBool   isSeqBAIJ;
4165   PetscMPIInt size;
4166 
4167   PetscFunctionBegin;
4168   /* Create the factorization matrix */
4169   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4170   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4171   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4172   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4173   PetscCall(MatSetUp(B));
4174 
4175   PetscCall(PetscNew(&mumps));
4176   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4177   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4178   B->factortype            = MAT_FACTOR_LU;
4179   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4180   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4181   mumps->sym = 0;
4182   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4183 
4184   B->ops->view    = MatView_MUMPS;
4185   B->ops->getinfo = MatGetInfo_MUMPS;
4186 
4187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4193   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4194   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4195   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4200   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4202 
4203   /* set solvertype */
4204   PetscCall(PetscFree(B->solvertype));
4205   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4206   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4207   if (size == 1) {
4208     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4209     B->canuseordering = PETSC_TRUE;
4210   }
4211   B->ops->destroy = MatDestroy_MUMPS;
4212   B->data         = (void *)mumps;
4213 
4214   *F               = B;
4215   mumps->id.job    = JOB_NULL;
4216   mumps->ICNTL_pre = NULL;
4217   mumps->CNTL_pre  = NULL;
4218   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4219   PetscFunctionReturn(PETSC_SUCCESS);
4220 }
4221 
4222 /* MatGetFactor for Seq and MPI SELL matrices */
4223 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4224 {
4225   Mat         B;
4226   Mat_MUMPS  *mumps;
4227   PetscBool   isSeqSELL;
4228   PetscMPIInt size;
4229 
4230   PetscFunctionBegin;
4231   /* Create the factorization matrix */
4232   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4233   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4234   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4235   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4236   PetscCall(MatSetUp(B));
4237 
4238   PetscCall(PetscNew(&mumps));
4239 
4240   B->ops->view    = MatView_MUMPS;
4241   B->ops->getinfo = MatGetInfo_MUMPS;
4242 
4243   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4244   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4245   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4246   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4247   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4248   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4251   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4254   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4255 
4256   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4257   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4258   B->factortype            = MAT_FACTOR_LU;
4259   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4260   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4261   mumps->sym              = 0;
4262   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4263 
4264   /* set solvertype */
4265   PetscCall(PetscFree(B->solvertype));
4266   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4267   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4268   if (size == 1) {
4269     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4270     B->canuseordering = PETSC_TRUE;
4271   }
4272   B->ops->destroy = MatDestroy_MUMPS;
4273   B->data         = (void *)mumps;
4274 
4275   *F               = B;
4276   mumps->id.job    = JOB_NULL;
4277   mumps->ICNTL_pre = NULL;
4278   mumps->CNTL_pre  = NULL;
4279   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4280   PetscFunctionReturn(PETSC_SUCCESS);
4281 }
4282 
4283 /* MatGetFactor for MATNEST matrices */
4284 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4285 {
4286   Mat         B, **mats;
4287   Mat_MUMPS  *mumps;
4288   PetscInt    nr, nc;
4289   PetscMPIInt size;
4290   PetscBool   flg = PETSC_TRUE;
4291 
4292   PetscFunctionBegin;
4293 #if defined(PETSC_USE_COMPLEX)
4294   if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4295     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4296     *F = NULL;
4297     PetscFunctionReturn(PETSC_SUCCESS);
4298   }
4299 #endif
4300 
4301   /* Return if some condition is not satisfied */
4302   *F = NULL;
4303   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4304   if (ftype == MAT_FACTOR_CHOLESKY) {
4305     IS       *rows, *cols;
4306     PetscInt *m, *M;
4307 
4308     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4309     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4310     PetscCall(MatNestGetISs(A, rows, cols));
4311     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4312     if (!flg) {
4313       PetscCall(PetscFree2(rows, cols));
4314       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4315       PetscFunctionReturn(PETSC_SUCCESS);
4316     }
4317     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4318     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4319     for (PetscInt r = 0; flg && r < nr; r++)
4320       for (PetscInt k = r + 1; flg && k < nr; k++)
4321         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4322     PetscCall(PetscFree2(m, M));
4323     PetscCall(PetscFree2(rows, cols));
4324     if (!flg) {
4325       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4326       PetscFunctionReturn(PETSC_SUCCESS);
4327     }
4328   }
4329 
4330   for (PetscInt r = 0; r < nr; r++) {
4331     for (PetscInt c = 0; c < nc; c++) {
4332       Mat       sub = mats[r][c];
4333       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4334 
4335       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4336       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4337       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4338       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4339       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4340       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4341       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4342       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4343       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4344       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4345       if (ftype == MAT_FACTOR_CHOLESKY) {
4346         if (r == c) {
4347           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4348             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4349             flg = PETSC_FALSE;
4350           }
4351         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4352           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4353           flg = PETSC_FALSE;
4354         }
4355       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4356         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4357         flg = PETSC_FALSE;
4358       }
4359     }
4360   }
4361   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4362 
4363   /* Create the factorization matrix */
4364   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4365   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4366   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4367   PetscCall(MatSetUp(B));
4368 
4369   PetscCall(PetscNew(&mumps));
4370 
4371   B->ops->view    = MatView_MUMPS;
4372   B->ops->getinfo = MatGetInfo_MUMPS;
4373 
4374   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4375   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4376   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4377   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4378   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4379   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4380   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4381   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4382   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4383   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4384   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4385   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4386   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4387   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4388   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4389 
4390   if (ftype == MAT_FACTOR_LU) {
4391     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4392     B->factortype            = MAT_FACTOR_LU;
4393     mumps->sym               = 0;
4394   } else {
4395     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4396     B->factortype                  = MAT_FACTOR_CHOLESKY;
4397 #if defined(PETSC_USE_COMPLEX)
4398     mumps->sym = 2;
4399 #else
4400     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4401     else mumps->sym = 2;
4402 #endif
4403   }
4404   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4405   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4406 
4407   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4408   if (size == 1) {
4409     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4410     B->canuseordering = PETSC_TRUE;
4411   }
4412 
4413   /* set solvertype */
4414   PetscCall(PetscFree(B->solvertype));
4415   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4416   B->ops->destroy = MatDestroy_MUMPS;
4417   B->data         = (void *)mumps;
4418 
4419   *F               = B;
4420   mumps->id.job    = JOB_NULL;
4421   mumps->ICNTL_pre = NULL;
4422   mumps->CNTL_pre  = NULL;
4423   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4424   PetscFunctionReturn(PETSC_SUCCESS);
4425 }
4426 
4427 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4428 {
4429   PetscFunctionBegin;
4430   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4431   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4432   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4433   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4434   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4435   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4436   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4437   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4438   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4439   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4440   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4441   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4442   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4443   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4444   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4445   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4446   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4447   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4448   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4449   PetscFunctionReturn(PETSC_SUCCESS);
4450 }
4451