xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
1 /*
2     Provides an interface to the MUMPS sparse solver
3 */
4 #include <petscpkg_version.h>
5 #include <petscsf.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 #include <petsc/private/vecimpl.h>
10 
11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")"
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
15   #include <cmumps_c.h>
16   #include <zmumps_c.h>
17   #include <smumps_c.h>
18   #include <dmumps_c.h>
19 #else
20   #if defined(PETSC_USE_COMPLEX)
21     #if defined(PETSC_USE_REAL_SINGLE)
22       #include <cmumps_c.h>
23       #define MUMPS_c     cmumps_c
24       #define MumpsScalar CMUMPS_COMPLEX
25     #else
26       #include <zmumps_c.h>
27       #define MUMPS_c     zmumps_c
28       #define MumpsScalar ZMUMPS_COMPLEX
29     #endif
30   #else
31     #if defined(PETSC_USE_REAL_SINGLE)
32       #include <smumps_c.h>
33       #define MUMPS_c     smumps_c
34       #define MumpsScalar SMUMPS_REAL
35     #else
36       #include <dmumps_c.h>
37       #define MUMPS_c     dmumps_c
38       #define MumpsScalar DMUMPS_REAL
39     #endif
40   #endif
41 #endif
42 #if defined(PETSC_USE_COMPLEX)
43   #if defined(PETSC_USE_REAL_SINGLE)
44     #define MUMPS_STRUC_C CMUMPS_STRUC_C
45   #else
46     #define MUMPS_STRUC_C ZMUMPS_STRUC_C
47   #endif
48 #else
49   #if defined(PETSC_USE_REAL_SINGLE)
50     #define MUMPS_STRUC_C SMUMPS_STRUC_C
51   #else
52     #define MUMPS_STRUC_C DMUMPS_STRUC_C
53   #endif
54 #endif
55 EXTERN_C_END
56 
57 #define JOB_INIT         -1
58 #define JOB_NULL         0
59 #define JOB_FACTSYMBOLIC 1
60 #define JOB_FACTNUMERIC  2
61 #define JOB_SOLVE        3
62 #define JOB_END          -2
63 
64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
65    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
66    naming convention in PetscMPIInt, PetscBLASInt etc.
67 */
68 typedef MUMPS_INT PetscMUMPSInt;
69 
70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0)
71   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
72     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
73   #endif
74 #else
75   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
76     #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out"
77   #endif
78 #endif
79 
80 #define MPIU_MUMPSINT       MPI_INT
81 #define PETSC_MUMPS_INT_MAX 2147483647
82 #define PETSC_MUMPS_INT_MIN -2147483648
83 
84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
PetscMUMPSIntCast(PetscCount a,PetscMUMPSInt * b)85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b)
86 {
87   PetscFunctionBegin;
88 #if PetscDefined(USE_64BIT_INDICES)
89   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
90 #endif
91   *b = (PetscMUMPSInt)a;
92   PetscFunctionReturn(PETSC_SUCCESS);
93 }
94 
95 /* Put these utility routines here since they are only used in this file */
PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt * value,PetscBool * set,PetscMUMPSInt lb,PetscMUMPSInt ub)96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub)
97 {
98   PetscInt  myval;
99   PetscBool myset;
100 
101   PetscFunctionBegin;
102   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
103   PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub));
104   if (myset) PetscCall(PetscMUMPSIntCast(myval, value));
105   if (set) *set = myset;
106   PetscFunctionReturn(PETSC_SUCCESS);
107 }
108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX)
109 
110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C.
111 //
112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type.
113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size
114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall().
115 //
116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface.
117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used.
118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers.
119 typedef struct {
120   PetscPrecision precision;   // precision used by MUMPS
121   void          *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex
122 
123   // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions.
124   MUMPS_INT  sym, par, job;
125   MUMPS_INT  comm_fortran; /* Fortran communicator */
126   MUMPS_INT *icntl;
127   void      *cntl; // MumpsReal, fixed size array
128   MUMPS_INT  n;
129   MUMPS_INT  nblk;
130 
131   /* Assembled entry */
132   MUMPS_INT8 nnz;
133   MUMPS_INT *irn;
134   MUMPS_INT *jcn;
135   void      *a; // MumpsScalar, centralized input
136   PetscCount a_len;
137 
138   /* Distributed entry */
139   MUMPS_INT8 nnz_loc;
140   MUMPS_INT *irn_loc;
141   MUMPS_INT *jcn_loc;
142   void      *a_loc; // MumpsScalar, distributed input
143   PetscCount a_loc_len;
144 
145   /* Matrix by blocks */
146   MUMPS_INT *blkptr;
147   MUMPS_INT *blkvar;
148 
149   /* Ordering, if given by user */
150   MUMPS_INT *perm_in;
151 
152   /* RHS, solution, ouptput data and statistics */
153   void      *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc;                 // MumpsScalar buffers
154   PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar
155 
156   MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc;
157   MUMPS_INT  nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc;
158   // MUMPS_INT  nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0)
159   MUMPS_INT  schur_lld;
160   MUMPS_INT *info, *infog;   // fixed size array
161   void      *rinfo, *rinfog; // MumpsReal, fixed size array
162 
163   /* Null space */
164   MUMPS_INT *pivnul_list; // allocated by MUMPS!
165   MUMPS_INT *mapping;     // allocated by MUMPS!
166 
167   /* Schur */
168   MUMPS_INT  size_schur;
169   MUMPS_INT *listvar_schur;
170   void      *schur; // MumpsScalar
171   PetscCount schur_len;
172 
173   /* For out-of-core */
174   char *ooc_tmpdir; // fixed size array
175   char *ooc_prefix; // fixed size array
176 } XMUMPS_STRUC_C;
177 
178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct
179 #define AllocateInternalID(MUMPS_STRUC_T, outer) \
180   do { \
181     MUMPS_STRUC_T *inner; \
182     PetscCall(PetscNew(&inner)); \
183     outer->icntl      = inner->icntl; \
184     outer->cntl       = inner->cntl; \
185     outer->info       = inner->info; \
186     outer->infog      = inner->infog; \
187     outer->rinfo      = inner->rinfo; \
188     outer->rinfog     = inner->rinfog; \
189     outer->ooc_tmpdir = inner->ooc_tmpdir; \
190     outer->ooc_prefix = inner->ooc_prefix; \
191     /* the three field should never change after init */ \
192     inner->comm_fortran = outer->comm_fortran; \
193     inner->par          = outer->par; \
194     inner->sym          = outer->sym; \
195     outer->internal_id  = inner; \
196   } while (0)
197 
198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner
MatMumpsAllocateInternalID(XMUMPS_STRUC_C * outer,PetscPrecision precision)199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision)
200 {
201   PetscFunctionBegin;
202   outer->precision = precision;
203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
204   #if defined(PETSC_USE_COMPLEX)
205   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(CMUMPS_STRUC_C, outer);
206   else AllocateInternalID(ZMUMPS_STRUC_C, outer);
207   #else
208   if (precision == PETSC_PRECISION_SINGLE) AllocateInternalID(SMUMPS_STRUC_C, outer);
209   else AllocateInternalID(DMUMPS_STRUC_C, outer);
210   #endif
211 #else
212   AllocateInternalID(MUMPS_STRUC_C, outer);
213 #endif
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \
218   do { \
219     MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \
220     PetscCall(PetscFree(inner->a)); \
221     PetscCall(PetscFree(inner->a_loc)); \
222     PetscCall(PetscFree(inner->redrhs)); \
223     PetscCall(PetscFree(inner->rhs)); \
224     PetscCall(PetscFree(inner->rhs_sparse)); \
225     PetscCall(PetscFree(inner->rhs_loc)); \
226     PetscCall(PetscFree(inner->sol_loc)); \
227     PetscCall(PetscFree(inner->schur)); \
228   } while (0)
229 
MatMumpsFreeInternalID(XMUMPS_STRUC_C * outer)230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer)
231 {
232   PetscFunctionBegin;
233   if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer
234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
235     const PetscPrecision mumps_precision = outer->precision;
236     if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision
237   #if defined(PETSC_USE_COMPLEX)
238       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer);
239       else FreeInternalIDFields(ZMUMPS_STRUC_C, outer);
240   #else
241       if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer);
242       else FreeInternalIDFields(DMUMPS_STRUC_C, outer);
243   #endif
244     }
245 #endif
246     PetscCall(PetscFree(outer->internal_id));
247   }
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>.
252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array.
253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>)
254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request.
255 //
256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same.
257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output.
258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output.
259 //
260 //
261 //   Input parameters:
262 // + convert   - whether to do PetscScalar to MumpsScalar conversion
263 // . n         - length of the PetscScalar array
264 // . pa        - [n]], points to the PetscScalar array
265 // . precision - precision of MumpsScalar
266 // . m         - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero.
267 // - ma        - on input, an existing MumpsScalar array if any.
268 //
269 //   Output parameters:
270 // + m  - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0
271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same.
272 //
273 //   Note:
274 //    New memory, if allocated, is done via PetscMalloc1(), and is owned by caller.
MatMumpsMakeMumpsScalarArray(PetscBool convert,PetscCount n,const PetscScalar * pa,PetscPrecision precision,PetscCount * m,void ** ma)275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma)
276 {
277   PetscFunctionBegin;
278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
279   const PetscPrecision mumps_precision = precision;
280   PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision);
281   #if defined(PETSC_USE_COMPLEX)
282   if (mumps_precision != PETSC_SCALAR_PRECISION) {
283     if (mumps_precision == PETSC_PRECISION_SINGLE) {
284       if (*m < n) {
285         PetscCall(PetscFree(*ma));
286         PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma));
287         *m = n;
288       }
289       if (convert) {
290         CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma;
291         for (PetscCount i = 0; i < n; i++) {
292           b[i].r = PetscRealPart(pa[i]);
293           b[i].i = PetscImaginaryPart(pa[i]);
294         }
295       }
296     } else {
297       if (*m < n) {
298         PetscCall(PetscFree(*ma));
299         PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma));
300         *m = n;
301       }
302       if (convert) {
303         ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma;
304         for (PetscCount i = 0; i < n; i++) {
305           b[i].r = PetscRealPart(pa[i]);
306           b[i].i = PetscImaginaryPart(pa[i]);
307         }
308       }
309     }
310   }
311   #else
312   if (mumps_precision != PETSC_SCALAR_PRECISION) {
313     if (mumps_precision == PETSC_PRECISION_SINGLE) {
314       if (*m < n) {
315         PetscCall(PetscFree(*ma));
316         PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma));
317         *m = n;
318       }
319       if (convert) {
320         SMUMPS_REAL *b = *(SMUMPS_REAL **)ma;
321         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
322       }
323     } else {
324       if (*m < n) {
325         PetscCall(PetscFree(*ma));
326         PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma));
327         *m = n;
328       }
329       if (convert) {
330         DMUMPS_REAL *b = *(DMUMPS_REAL **)ma;
331         for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
332       }
333     }
334   }
335   #endif
336   else
337 #endif
338   {
339     if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any
340     *ma = (void *)pa;                       // same precision, make them alias
341     *m  = 0;
342   }
343   PetscFunctionReturn(PETSC_SUCCESS);
344 }
345 
346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>.
347 //
348 // 1) If the two types are different, cast array elements.
349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
MatMumpsCastMumpsScalarArray(PetscCount n,PetscPrecision mumps_precision,const void * ma,PetscScalar * pa)350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa)
351 {
352   PetscFunctionBegin;
353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
354   if (mumps_precision != PETSC_SCALAR_PRECISION) {
355   #if defined(PETSC_USE_COMPLEX)
356     if (mumps_precision == PETSC_PRECISION_SINGLE) {
357       PetscReal         *a = (PetscReal *)pa;
358       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
359       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
360     } else {
361       PetscReal         *a = (PetscReal *)pa;
362       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
363       for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i];
364     }
365   #else
366     if (mumps_precision == PETSC_PRECISION_SINGLE) {
367       const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma;
368       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
369     } else {
370       const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma;
371       for (PetscCount i = 0; i < n; i++) pa[i] = b[i];
372     }
373   #endif
374   } else
375 #endif
376     PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>.
381 //
382 // 1) If the two types are different, cast array elements.
383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op.
MatMumpsCastPetscScalarArray(PetscCount n,const PetscScalar * pa,PetscPrecision mumps_precision,const void * ma)384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma)
385 {
386   PetscFunctionBegin;
387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
388   if (mumps_precision != PETSC_SCALAR_PRECISION) {
389   #if defined(PETSC_USE_COMPLEX)
390     if (mumps_precision == PETSC_PRECISION_SINGLE) {
391       CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma;
392       for (PetscCount i = 0; i < n; i++) {
393         b[i].r = PetscRealPart(pa[i]);
394         b[i].i = PetscImaginaryPart(pa[i]);
395       }
396     } else {
397       ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma;
398       for (PetscCount i = 0; i < n; i++) {
399         b[i].r = PetscRealPart(pa[i]);
400         b[i].i = PetscImaginaryPart(pa[i]);
401       }
402     }
403   #else
404     if (mumps_precision == PETSC_PRECISION_SINGLE) {
405       SMUMPS_REAL *b = (SMUMPS_REAL *)ma;
406       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
407     } else {
408       DMUMPS_REAL *b = (DMUMPS_REAL *)ma;
409       for (PetscCount i = 0; i < n; i++) b[i] = pa[i];
410     }
411   #endif
412   } else
413 #endif
414     PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
MPIU_MUMPSREAL(const XMUMPS_STRUC_C * id)418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id)
419 {
420   return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT;
421 }
422 
423 #define PreMumpsCall(inner, outer, mumpsscalar) \
424   do { \
425     inner->job           = outer->job; \
426     inner->n             = outer->n; \
427     inner->nblk          = outer->nblk; \
428     inner->nnz           = outer->nnz; \
429     inner->irn           = outer->irn; \
430     inner->jcn           = outer->jcn; \
431     inner->a             = (mumpsscalar *)outer->a; \
432     inner->nnz_loc       = outer->nnz_loc; \
433     inner->irn_loc       = outer->irn_loc; \
434     inner->jcn_loc       = outer->jcn_loc; \
435     inner->a_loc         = (mumpsscalar *)outer->a_loc; \
436     inner->blkptr        = outer->blkptr; \
437     inner->blkvar        = outer->blkvar; \
438     inner->perm_in       = outer->perm_in; \
439     inner->rhs           = (mumpsscalar *)outer->rhs; \
440     inner->redrhs        = (mumpsscalar *)outer->redrhs; \
441     inner->rhs_sparse    = (mumpsscalar *)outer->rhs_sparse; \
442     inner->sol_loc       = (mumpsscalar *)outer->sol_loc; \
443     inner->rhs_loc       = (mumpsscalar *)outer->rhs_loc; \
444     inner->irhs_sparse   = outer->irhs_sparse; \
445     inner->irhs_ptr      = outer->irhs_ptr; \
446     inner->isol_loc      = outer->isol_loc; \
447     inner->irhs_loc      = outer->irhs_loc; \
448     inner->nrhs          = outer->nrhs; \
449     inner->lrhs          = outer->lrhs; \
450     inner->lredrhs       = outer->lredrhs; \
451     inner->nz_rhs        = outer->nz_rhs; \
452     inner->lsol_loc      = outer->lsol_loc; \
453     inner->nloc_rhs      = outer->nloc_rhs; \
454     inner->lrhs_loc      = outer->lrhs_loc; \
455     inner->schur_lld     = outer->schur_lld; \
456     inner->size_schur    = outer->size_schur; \
457     inner->listvar_schur = outer->listvar_schur; \
458     inner->schur         = (mumpsscalar *)outer->schur; \
459   } while (0)
460 
461 #define PostMumpsCall(inner, outer) \
462   do { \
463     outer->pivnul_list = inner->pivnul_list; \
464     outer->mapping     = inner->mapping; \
465   } while (0)
466 
467 // Entry for PETSc to call mumps
PetscCallMumps_Private(XMUMPS_STRUC_C * outer)468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer)
469 {
470   PetscFunctionBegin;
471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
472   #if defined(PETSC_USE_COMPLEX)
473   if (outer->precision == PETSC_PRECISION_SINGLE) {
474     CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id;
475     PreMumpsCall(inner, outer, CMUMPS_COMPLEX);
476     PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner));
477     PostMumpsCall(inner, outer);
478   } else {
479     ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id;
480     PreMumpsCall(inner, outer, ZMUMPS_COMPLEX);
481     PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner));
482     PostMumpsCall(inner, outer);
483   }
484   #else
485   if (outer->precision == PETSC_PRECISION_SINGLE) {
486     SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id;
487     PreMumpsCall(inner, outer, SMUMPS_REAL);
488     PetscStackCallExternalVoid("smumps_c", smumps_c(inner));
489     PostMumpsCall(inner, outer);
490   } else {
491     DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id;
492     PreMumpsCall(inner, outer, DMUMPS_REAL);
493     PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner));
494     PostMumpsCall(inner, outer);
495   }
496   #endif
497 #else
498   MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id;
499   PreMumpsCall(inner, outer, MumpsScalar);
500   PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner));
501   PostMumpsCall(inner, outer);
502 #endif
503   PetscFunctionReturn(PETSC_SUCCESS);
504 }
505 
506 /* macros s.t. indices match MUMPS documentation */
507 #define ICNTL(I) icntl[(I) - 1]
508 #define INFOG(I) infog[(I) - 1]
509 #define INFO(I)  info[(I) - 1]
510 
511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I!
512 #if defined(PETSC_USE_COMPLEX)
513   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i)
514 #else
515   #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I])
516 #endif
517 
518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal.
519 #define ID_CNTL_GET(ID, I)   ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1])
520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1])
521 #define ID_RINFO_GET(ID, I)  ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1])
522 
523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL>
524 #define ID_CNTL_SET(ID, I, VAL) \
525   do { \
526     if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \
527     else ((double *)(ID).cntl)[(I) - 1] = (VAL); \
528   } while (0)
529 
530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
531 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
532   #define PetscMUMPS_c(mumps) \
533     do { \
534       if (mumps->use_petsc_omp_support) { \
535         if (mumps->is_omp_master) { \
536           PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
537           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
538           PetscCall(PetscCallMumps_Private(&mumps->id)); \
539           PetscCall(PetscFPTrapPop()); \
540           PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
541         } \
542         PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
543         /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
544          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
545          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
546          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
547       */ \
548         MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \
549         PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
550         PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \
551         PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
552         PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \
553       } else { \
554         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
555         PetscCall(PetscCallMumps_Private(&mumps->id)); \
556         PetscCall(PetscFPTrapPop()); \
557       } \
558     } while (0)
559 #else
560   #define PetscMUMPS_c(mumps) \
561     do { \
562       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \
563       PetscCall(PetscCallMumps_Private(&mumps->id)); \
564       PetscCall(PetscFPTrapPop()); \
565     } while (0)
566 #endif
567 
568 typedef struct Mat_MUMPS Mat_MUMPS;
569 struct Mat_MUMPS {
570   XMUMPS_STRUC_C id;
571 
572   MatStructure   matstruc;
573   PetscMPIInt    myid, petsc_size;
574   PetscMUMPSInt *irn, *jcn;       /* the (i,j,v) triplets passed to mumps. */
575   PetscScalar   *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
576   PetscCount     nnz;             /* number of nonzeros. The type is called selective 64-bit in mumps */
577   PetscMUMPSInt  sym;
578   MPI_Comm       mumps_comm;
579   PetscMUMPSInt *ICNTL_pre;
580   PetscReal     *CNTL_pre;
581   PetscMUMPSInt  ICNTL9_pre;         /* check if ICNTL(9) is changed from previous MatSolve */
582   VecScatter     scat_rhs, scat_sol; /* used by MatSolve() */
583   PetscMUMPSInt  ICNTL20;            /* use centralized (0) or distributed (10) dense RHS */
584   PetscMUMPSInt  ICNTL26;
585   PetscMUMPSInt  lrhs_loc, nloc_rhs, *irhs_loc;
586 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
587   PetscInt    *rhs_nrow, max_nrhs;
588   PetscMPIInt *rhs_recvcounts, *rhs_disps;
589   PetscScalar *rhs_loc, *rhs_recvbuf;
590 #endif
591   Vec            b_seq, x_seq;
592   PetscInt       ninfo, *info; /* which INFO to display */
593   PetscInt       sizeredrhs;
594   PetscScalar   *schur_sol;
595   PetscInt       schur_sizesol;
596   PetscScalar   *redrhs;              // buffer in PetscScalar in case MumpsScalar is in a different precision
597   PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
598   PetscCount     cur_ilen, cur_jlen;  /* current len of ia_alloc[], ja_alloc[] */
599   PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
600 
601   /* Support for MATNEST */
602   PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *);
603   PetscCount  *nest_vals_start;
604   PetscScalar *nest_vals;
605 
606   /* stuff used by petsc/mumps OpenMP support*/
607   PetscBool    use_petsc_omp_support;
608   PetscOmpCtrl omp_ctrl;             /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
609   MPI_Comm     petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */
610   PetscCount  *recvcount;            /* a collection of nnz on omp_master */
611   PetscMPIInt  tag, omp_comm_size;
612   PetscBool    is_omp_master; /* is this rank the master of omp_comm */
613   MPI_Request *reqs;
614 };
615 
616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
617    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
618  */
PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS * mumps,PetscInt nrow,PetscInt * ia,PetscInt * ja,PetscMUMPSInt ** ia_mumps,PetscMUMPSInt ** ja_mumps,PetscMUMPSInt * nnz_mumps)619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps)
620 {
621   PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */
622 
623   PetscFunctionBegin;
624 #if defined(PETSC_USE_64BIT_INDICES)
625   {
626     PetscInt i;
627     if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
628       PetscCall(PetscFree(mumps->ia_alloc));
629       PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc));
630       mumps->cur_ilen = nrow + 1;
631     }
632     if (nnz > mumps->cur_jlen) {
633       PetscCall(PetscFree(mumps->ja_alloc));
634       PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc));
635       mumps->cur_jlen = nnz;
636     }
637     for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i]));
638     for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i]));
639     *ia_mumps = mumps->ia_alloc;
640     *ja_mumps = mumps->ja_alloc;
641   }
642 #else
643   *ia_mumps = ia;
644   *ja_mumps = ja;
645 #endif
646   PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps));
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
MatMumpsResetSchur_Private(Mat_MUMPS * mumps)650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps)
651 {
652   PetscFunctionBegin;
653   PetscCall(PetscFree(mumps->id.listvar_schur));
654   PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID()
655   PetscCall(PetscFree(mumps->schur_sol));
656   mumps->id.size_schur = 0;
657   mumps->id.schur_lld  = 0;
658   if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built
659   PetscFunctionReturn(PETSC_SUCCESS);
660 }
661 
662 /* solve with rhs in mumps->id.redrhs and return in the same location */
MatMumpsSolveSchur_Private(Mat F)663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
664 {
665   Mat_MUMPS           *mumps = (Mat_MUMPS *)F->data;
666   Mat                  S, B, X; // solve S*X = B; all three matrices are dense
667   MatFactorSchurStatus schurstatus;
668   PetscInt             sizesol;
669   const PetscScalar   *xarray;
670 
671   PetscFunctionBegin;
672   PetscCall(MatFactorFactorizeSchurComplement(F));
673   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
674   PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs));
675 
676   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B));
677   PetscCall(MatSetType(B, ((PetscObject)S)->type_name));
678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
679   PetscCall(MatBindToCPU(B, S->boundtocpu));
680 #endif
681   switch (schurstatus) {
682   case MAT_FACTOR_SCHUR_FACTORED:
683     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X));
684     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
686     PetscCall(MatBindToCPU(X, S->boundtocpu));
687 #endif
688     if (!mumps->id.ICNTL(9)) { /* transpose solve */
689       PetscCall(MatMatSolveTranspose(S, B, X));
690     } else {
691       PetscCall(MatMatSolve(S, B, X));
692     }
693     break;
694   case MAT_FACTOR_SCHUR_INVERTED:
695     sizesol = mumps->id.nrhs * mumps->id.size_schur;
696     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
697       PetscCall(PetscFree(mumps->schur_sol));
698       PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol));
699       mumps->schur_sizesol = sizesol;
700     }
701     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X));
702     PetscCall(MatSetType(X, ((PetscObject)S)->type_name));
703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
704     PetscCall(MatBindToCPU(X, S->boundtocpu));
705 #endif
706     PetscCall(MatProductCreateWithMat(S, B, NULL, X));
707     if (!mumps->id.ICNTL(9)) { /* transpose solve */
708       PetscCall(MatProductSetType(X, MATPRODUCT_AtB));
709     } else {
710       PetscCall(MatProductSetType(X, MATPRODUCT_AB));
711     }
712     PetscCall(MatProductSetFromOptions(X));
713     PetscCall(MatProductSymbolic(X));
714     PetscCall(MatProductNumeric(X));
715 
716     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
717     break;
718   default:
719     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status);
720   }
721   // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device.
722   PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs.
723   PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs));
724   PetscCall(MatDenseRestoreArrayRead(X, &xarray));
725   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
726   PetscCall(MatDestroy(&B));
727   PetscCall(MatDestroy(&X));
728   PetscFunctionReturn(PETSC_SUCCESS);
729 }
730 
MatMumpsHandleSchur_Private(Mat F,PetscBool expansion)731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
732 {
733   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
734 
735   PetscFunctionBegin;
736   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
737     PetscFunctionReturn(PETSC_SUCCESS);
738   }
739   if (!expansion) { /* prepare for the condensation step */
740     PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur;
741     /* allocate MUMPS internal array to store reduced right-hand sides */
742     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
743       mumps->id.lredrhs = mumps->id.size_schur;
744       mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs;
745       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
746       PetscCall(PetscFree(mumps->redrhs));
747       PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs));
748       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs));
749     }
750   } else {                                    /* prepare for the expansion step */
751     PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */
752     mumps->id.ICNTL(26) = 2;                  /* expansion phase */
753     PetscMUMPS_c(mumps);
754     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
755     /* restore defaults */
756     mumps->id.ICNTL(26) = -1;
757     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
758     if (mumps->id.nrhs > 1) {
759       if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs));
760       PetscCall(PetscFree(mumps->redrhs));
761       mumps->id.redrhs_len = 0;
762       mumps->id.lredrhs    = 0;
763       mumps->sizeredrhs    = 0;
764     }
765   }
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
769 /*
770   MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz]
771 
772   input:
773     A       - matrix in aij,baij or sbaij format
774     shift   - 0: C style output triple; 1: Fortran style output triple.
775     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
776               MAT_REUSE_MATRIX:   only the values in v array are updated
777   output:
778     nnz     - dim of r, c, and v (number of local nonzero entries of A)
779     r, c, v - row and col index, matrix values (matrix triples)
780 
781   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
782   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
783   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
784 
785  */
786 
MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
788 {
789   const PetscScalar *av;
790   const PetscInt    *ai, *aj, *ajj, M = A->rmap->n;
791   PetscCount         nz, rnz, k;
792   PetscMUMPSInt     *row, *col;
793   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
794 
795   PetscFunctionBegin;
796   PetscCall(MatSeqAIJGetArrayRead(A, &av));
797   if (reuse == MAT_INITIAL_MATRIX) {
798     nz = aa->nz;
799     ai = aa->i;
800     aj = aa->j;
801     PetscCall(PetscMalloc2(nz, &row, nz, &col));
802     for (PetscCount i = k = 0; i < M; i++) {
803       rnz = ai[i + 1] - ai[i];
804       ajj = aj + ai[i];
805       for (PetscCount j = 0; j < rnz; j++) {
806         PetscCall(PetscMUMPSIntCast(i + shift, &row[k]));
807         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k]));
808         k++;
809       }
810     }
811     mumps->val = (PetscScalar *)av;
812     mumps->irn = row;
813     mumps->jcn = col;
814     mumps->nnz = nz;
815   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */
816   else mumps->val = (PetscScalar *)av;                                           /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
817   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
818   PetscFunctionReturn(PETSC_SUCCESS);
819 }
820 
MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
822 {
823   PetscCount     nz, i, j, k, r;
824   Mat_SeqSELL   *a = (Mat_SeqSELL *)A->data;
825   PetscMUMPSInt *row, *col;
826 
827   PetscFunctionBegin;
828   nz = a->sliidx[a->totalslices];
829   if (reuse == MAT_INITIAL_MATRIX) {
830     PetscCall(PetscMalloc2(nz, &row, nz, &col));
831     for (i = k = 0; i < a->totalslices; i++) {
832       for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++]));
833     }
834     for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i]));
835     mumps->irn = row;
836     mumps->jcn = col;
837     mumps->nnz = nz;
838     mumps->val = a->val;
839   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */
840   else mumps->val = a->val;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
845 {
846   Mat_SeqBAIJ    *aa = (Mat_SeqBAIJ *)A->data;
847   const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2;
848   PetscCount      M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m;
849   PetscInt        bs;
850   PetscMUMPSInt  *row, *col;
851 
852   PetscFunctionBegin;
853   if (reuse == MAT_INITIAL_MATRIX) {
854     PetscCall(MatGetBlockSize(A, &bs));
855     M  = A->rmap->N / bs;
856     ai = aa->i;
857     aj = aa->j;
858     PetscCall(PetscMalloc2(nz, &row, nz, &col));
859     for (i = 0; i < M; i++) {
860       ajj = aj + ai[i];
861       rnz = ai[i + 1] - ai[i];
862       for (k = 0; k < rnz; k++) {
863         for (j = 0; j < bs; j++) {
864           for (m = 0; m < bs; m++) {
865             PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx]));
866             PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx]));
867             idx++;
868           }
869         }
870       }
871     }
872     mumps->irn = row;
873     mumps->jcn = col;
874     mumps->nnz = nz;
875     mumps->val = aa->a;
876   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */
877   else mumps->val = aa->a;                                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
882 {
883   const PetscInt *ai, *aj, *ajj;
884   PetscInt        bs;
885   PetscCount      nz, rnz, i, j, k, m;
886   PetscMUMPSInt  *row, *col;
887   PetscScalar    *val;
888   Mat_SeqSBAIJ   *aa  = (Mat_SeqSBAIJ *)A->data;
889   const PetscInt  bs2 = aa->bs2, mbs = aa->mbs;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscBool isset, hermitian;
892 #endif
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_COMPLEX)
896   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
897   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
898 #endif
899   ai = aa->i;
900   aj = aa->j;
901   PetscCall(MatGetBlockSize(A, &bs));
902   if (reuse == MAT_INITIAL_MATRIX) {
903     const PetscCount alloc_size = aa->nz * bs2;
904 
905     PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col));
906     if (bs > 1) {
907       PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc));
908       mumps->val = mumps->val_alloc;
909     } else {
910       mumps->val = aa->a;
911     }
912     mumps->irn = row;
913     mumps->jcn = col;
914   } else {
915     row = mumps->irn;
916     col = mumps->jcn;
917   }
918   val = mumps->val;
919 
920   nz = 0;
921   if (bs > 1) {
922     for (i = 0; i < mbs; i++) {
923       rnz = ai[i + 1] - ai[i];
924       ajj = aj + ai[i];
925       for (j = 0; j < rnz; j++) {
926         for (k = 0; k < bs; k++) {
927           for (m = 0; m < bs; m++) {
928             if (ajj[j] > i || k >= m) {
929               if (reuse == MAT_INITIAL_MATRIX) {
930                 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz]));
931                 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz]));
932               }
933               val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs];
934             }
935           }
936         }
937       }
938     }
939   } else if (reuse == MAT_INITIAL_MATRIX) {
940     for (i = 0; i < mbs; i++) {
941       rnz = ai[i + 1] - ai[i];
942       ajj = aj + ai[i];
943       for (j = 0; j < rnz; j++) {
944         PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
945         PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
946         nz++;
947       }
948     }
949     PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz);
950   } else if (mumps->nest_vals)
951     PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */
952   else mumps->val = aa->a;                               /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
953   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
954   PetscFunctionReturn(PETSC_SUCCESS);
955 }
956 
MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
958 {
959   const PetscInt    *ai, *aj, *ajj, *adiag, M = A->rmap->n;
960   PetscCount         nz, rnz, i, j;
961   const PetscScalar *av, *v1;
962   PetscScalar       *val;
963   PetscMUMPSInt     *row, *col;
964   Mat_SeqAIJ        *aa = (Mat_SeqAIJ *)A->data;
965   PetscBool          diagDense;
966 #if defined(PETSC_USE_COMPLEX)
967   PetscBool hermitian, isset;
968 #endif
969 
970   PetscFunctionBegin;
971 #if defined(PETSC_USE_COMPLEX)
972   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
973   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
974 #endif
975   PetscCall(MatSeqAIJGetArrayRead(A, &av));
976   ai = aa->i;
977   aj = aa->j;
978   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
979   if (reuse == MAT_INITIAL_MATRIX) {
980     /* count nz in the upper triangular part of A */
981     nz = 0;
982     if (!diagDense) {
983       for (i = 0; i < M; i++) {
984         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
985           for (j = ai[i]; j < ai[i + 1]; j++) {
986             if (aj[j] < i) continue;
987             nz++;
988           }
989         } else {
990           nz += ai[i + 1] - adiag[i];
991         }
992       }
993     } else {
994       for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i];
995     }
996     PetscCall(PetscMalloc2(nz, &row, nz, &col));
997     PetscCall(PetscMalloc1(nz, &val));
998     mumps->nnz = nz;
999     mumps->irn = row;
1000     mumps->jcn = col;
1001     mumps->val = mumps->val_alloc = val;
1002 
1003     nz = 0;
1004     if (!diagDense) {
1005       for (i = 0; i < M; i++) {
1006         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1007           for (j = ai[i]; j < ai[i + 1]; j++) {
1008             if (aj[j] < i) continue;
1009             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1010             PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz]));
1011             val[nz] = av[j];
1012             nz++;
1013           }
1014         } else {
1015           rnz = ai[i + 1] - adiag[i];
1016           ajj = aj + adiag[i];
1017           v1  = av + adiag[i];
1018           for (j = 0; j < rnz; j++) {
1019             PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1020             PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1021             val[nz++] = v1[j];
1022           }
1023         }
1024       }
1025     } else {
1026       for (i = 0; i < M; i++) {
1027         rnz = ai[i + 1] - adiag[i];
1028         ajj = aj + adiag[i];
1029         v1  = av + adiag[i];
1030         for (j = 0; j < rnz; j++) {
1031           PetscCall(PetscMUMPSIntCast(i + shift, &row[nz]));
1032           PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz]));
1033           val[nz++] = v1[j];
1034         }
1035       }
1036     }
1037   } else {
1038     nz  = 0;
1039     val = mumps->val;
1040     if (!diagDense) {
1041       for (i = 0; i < M; i++) {
1042         if (PetscUnlikely(adiag[i] >= ai[i + 1])) {
1043           for (j = ai[i]; j < ai[i + 1]; j++) {
1044             if (aj[j] < i) continue;
1045             val[nz++] = av[j];
1046           }
1047         } else {
1048           rnz = ai[i + 1] - adiag[i];
1049           v1  = av + adiag[i];
1050           for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1051         }
1052       }
1053     } else {
1054       for (i = 0; i < M; i++) {
1055         rnz = ai[i + 1] - adiag[i];
1056         v1  = av + adiag[i];
1057         for (j = 0; j < rnz; j++) val[nz++] = v1[j];
1058       }
1059     }
1060   }
1061   PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1062   PetscFunctionReturn(PETSC_SUCCESS);
1063 }
1064 
MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1066 {
1067   const PetscInt    *ai, *aj, *bi, *bj, *garray, *ajj, *bjj;
1068   PetscInt           bs;
1069   PetscCount         rstart, nz, i, j, k, m, jj, irow, countA, countB;
1070   PetscMUMPSInt     *row, *col;
1071   const PetscScalar *av, *bv, *v1, *v2;
1072   PetscScalar       *val;
1073   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)A->data;
1074   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ *)mat->A->data;
1075   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1076   const PetscInt     bs2 = aa->bs2, mbs = aa->mbs;
1077 #if defined(PETSC_USE_COMPLEX)
1078   PetscBool hermitian, isset;
1079 #endif
1080 
1081   PetscFunctionBegin;
1082 #if defined(PETSC_USE_COMPLEX)
1083   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1084   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1085 #endif
1086   PetscCall(MatGetBlockSize(A, &bs));
1087   rstart = A->rmap->rstart;
1088   ai     = aa->i;
1089   aj     = aa->j;
1090   bi     = bb->i;
1091   bj     = bb->j;
1092   av     = aa->a;
1093   bv     = bb->a;
1094 
1095   garray = mat->garray;
1096 
1097   if (reuse == MAT_INITIAL_MATRIX) {
1098     nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */
1099     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1100     PetscCall(PetscMalloc1(nz, &val));
1101     /* can not decide the exact mumps->nnz now because of the SBAIJ */
1102     mumps->irn = row;
1103     mumps->jcn = col;
1104     mumps->val = mumps->val_alloc = val;
1105   } else {
1106     val = mumps->val;
1107   }
1108 
1109   jj   = 0;
1110   irow = rstart;
1111   for (i = 0; i < mbs; i++) {
1112     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1113     countA = ai[i + 1] - ai[i];
1114     countB = bi[i + 1] - bi[i];
1115     bjj    = bj + bi[i];
1116     v1     = av + ai[i] * bs2;
1117     v2     = bv + bi[i] * bs2;
1118 
1119     if (bs > 1) {
1120       /* A-part */
1121       for (j = 0; j < countA; j++) {
1122         for (k = 0; k < bs; k++) {
1123           for (m = 0; m < bs; m++) {
1124             if (rstart + ajj[j] * bs > irow || k >= m) {
1125               if (reuse == MAT_INITIAL_MATRIX) {
1126                 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1127                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj]));
1128               }
1129               val[jj++] = v1[j * bs2 + m + k * bs];
1130             }
1131           }
1132         }
1133       }
1134 
1135       /* B-part */
1136       for (j = 0; j < countB; j++) {
1137         for (k = 0; k < bs; k++) {
1138           for (m = 0; m < bs; m++) {
1139             if (reuse == MAT_INITIAL_MATRIX) {
1140               PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj]));
1141               PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj]));
1142             }
1143             val[jj++] = v2[j * bs2 + m + k * bs];
1144           }
1145         }
1146       }
1147     } else {
1148       /* A-part */
1149       for (j = 0; j < countA; j++) {
1150         if (reuse == MAT_INITIAL_MATRIX) {
1151           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1152           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1153         }
1154         val[jj++] = v1[j];
1155       }
1156 
1157       /* B-part */
1158       for (j = 0; j < countB; j++) {
1159         if (reuse == MAT_INITIAL_MATRIX) {
1160           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1161           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1162         }
1163         val[jj++] = v2[j];
1164       }
1165     }
1166     irow += bs;
1167   }
1168   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj;
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1173 {
1174   const PetscInt    *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1175   PetscCount         rstart, cstart, nz, i, j, jj, irow, countA, countB;
1176   PetscMUMPSInt     *row, *col;
1177   const PetscScalar *av, *bv, *v1, *v2;
1178   PetscScalar       *val;
1179   Mat                Ad, Ao;
1180   Mat_SeqAIJ        *aa;
1181   Mat_SeqAIJ        *bb;
1182 
1183   PetscFunctionBegin;
1184   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1185   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1186   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1187 
1188   aa = (Mat_SeqAIJ *)Ad->data;
1189   bb = (Mat_SeqAIJ *)Ao->data;
1190   ai = aa->i;
1191   aj = aa->j;
1192   bi = bb->i;
1193   bj = bb->j;
1194 
1195   rstart = A->rmap->rstart;
1196   cstart = A->cmap->rstart;
1197 
1198   if (reuse == MAT_INITIAL_MATRIX) {
1199     nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
1200     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1201     PetscCall(PetscMalloc1(nz, &val));
1202     mumps->nnz = nz;
1203     mumps->irn = row;
1204     mumps->jcn = col;
1205     mumps->val = mumps->val_alloc = val;
1206   } else {
1207     val = mumps->val;
1208   }
1209 
1210   jj   = 0;
1211   irow = rstart;
1212   for (i = 0; i < m; i++) {
1213     ajj    = aj + ai[i]; /* ptr to the beginning of this row */
1214     countA = ai[i + 1] - ai[i];
1215     countB = bi[i + 1] - bi[i];
1216     bjj    = bj + bi[i];
1217     v1     = av + ai[i];
1218     v2     = bv + bi[i];
1219 
1220     /* A-part */
1221     for (j = 0; j < countA; j++) {
1222       if (reuse == MAT_INITIAL_MATRIX) {
1223         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1224         PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj]));
1225       }
1226       val[jj++] = v1[j];
1227     }
1228 
1229     /* B-part */
1230     for (j = 0; j < countB; j++) {
1231       if (reuse == MAT_INITIAL_MATRIX) {
1232         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1233         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1234       }
1235       val[jj++] = v2[j];
1236     }
1237     irow++;
1238   }
1239   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1240   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1245 {
1246   Mat_MPIBAIJ       *mat = (Mat_MPIBAIJ *)A->data;
1247   Mat_SeqBAIJ       *aa  = (Mat_SeqBAIJ *)mat->A->data;
1248   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ *)mat->B->data;
1249   const PetscInt    *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj;
1250   const PetscInt    *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart;
1251   const PetscInt     bs2 = mat->bs2;
1252   PetscInt           bs;
1253   PetscCount         nz, i, j, k, n, jj, irow, countA, countB, idx;
1254   PetscMUMPSInt     *row, *col;
1255   const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2;
1256   PetscScalar       *val;
1257 
1258   PetscFunctionBegin;
1259   PetscCall(MatGetBlockSize(A, &bs));
1260   if (reuse == MAT_INITIAL_MATRIX) {
1261     nz = bs2 * (aa->nz + bb->nz);
1262     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1263     PetscCall(PetscMalloc1(nz, &val));
1264     mumps->nnz = nz;
1265     mumps->irn = row;
1266     mumps->jcn = col;
1267     mumps->val = mumps->val_alloc = val;
1268   } else {
1269     val = mumps->val;
1270   }
1271 
1272   jj   = 0;
1273   irow = rstart;
1274   for (i = 0; i < mbs; i++) {
1275     countA = ai[i + 1] - ai[i];
1276     countB = bi[i + 1] - bi[i];
1277     ajj    = aj + ai[i];
1278     bjj    = bj + bi[i];
1279     v1     = av + bs2 * ai[i];
1280     v2     = bv + bs2 * bi[i];
1281 
1282     idx = 0;
1283     /* A-part */
1284     for (k = 0; k < countA; k++) {
1285       for (j = 0; j < bs; j++) {
1286         for (n = 0; n < bs; n++) {
1287           if (reuse == MAT_INITIAL_MATRIX) {
1288             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1289             PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj]));
1290           }
1291           val[jj++] = v1[idx++];
1292         }
1293       }
1294     }
1295 
1296     idx = 0;
1297     /* B-part */
1298     for (k = 0; k < countB; k++) {
1299       for (j = 0; j < bs; j++) {
1300         for (n = 0; n < bs; n++) {
1301           if (reuse == MAT_INITIAL_MATRIX) {
1302             PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj]));
1303             PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj]));
1304           }
1305           val[jj++] = v2[idx++];
1306         }
1307       }
1308     }
1309     irow += bs;
1310   }
1311   PetscFunctionReturn(PETSC_SUCCESS);
1312 }
1313 
MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1315 {
1316   const PetscInt    *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj;
1317   PetscCount         rstart, nz, nza, nzb, i, j, jj, irow, countA, countB;
1318   PetscMUMPSInt     *row, *col;
1319   const PetscScalar *av, *bv, *v1, *v2;
1320   PetscScalar       *val;
1321   Mat                Ad, Ao;
1322   Mat_SeqAIJ        *aa;
1323   Mat_SeqAIJ        *bb;
1324 #if defined(PETSC_USE_COMPLEX)
1325   PetscBool hermitian, isset;
1326 #endif
1327 
1328   PetscFunctionBegin;
1329 #if defined(PETSC_USE_COMPLEX)
1330   PetscCall(MatIsHermitianKnown(A, &isset, &hermitian));
1331   PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy");
1332 #endif
1333   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray));
1334   PetscCall(MatSeqAIJGetArrayRead(Ad, &av));
1335   PetscCall(MatSeqAIJGetArrayRead(Ao, &bv));
1336 
1337   aa = (Mat_SeqAIJ *)Ad->data;
1338   bb = (Mat_SeqAIJ *)Ao->data;
1339   ai = aa->i;
1340   aj = aa->j;
1341   bi = bb->i;
1342   bj = bb->j;
1343   PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL));
1344   rstart = A->rmap->rstart;
1345 
1346   if (reuse == MAT_INITIAL_MATRIX) {
1347     nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
1348     nzb = 0; /* num of upper triangular entries in mat->B */
1349     for (i = 0; i < m; i++) {
1350       nza += (ai[i + 1] - adiag[i]);
1351       countB = bi[i + 1] - bi[i];
1352       bjj    = bj + bi[i];
1353       for (j = 0; j < countB; j++) {
1354         if (garray[bjj[j]] > rstart) nzb++;
1355       }
1356     }
1357 
1358     nz = nza + nzb; /* total nz of upper triangular part of mat */
1359     PetscCall(PetscMalloc2(nz, &row, nz, &col));
1360     PetscCall(PetscMalloc1(nz, &val));
1361     mumps->nnz = nz;
1362     mumps->irn = row;
1363     mumps->jcn = col;
1364     mumps->val = mumps->val_alloc = val;
1365   } else {
1366     val = mumps->val;
1367   }
1368 
1369   jj   = 0;
1370   irow = rstart;
1371   for (i = 0; i < m; i++) {
1372     ajj    = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
1373     v1     = av + adiag[i];
1374     countA = ai[i + 1] - adiag[i];
1375     countB = bi[i + 1] - bi[i];
1376     bjj    = bj + bi[i];
1377     v2     = bv + bi[i];
1378 
1379     /* A-part */
1380     for (j = 0; j < countA; j++) {
1381       if (reuse == MAT_INITIAL_MATRIX) {
1382         PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1383         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj]));
1384       }
1385       val[jj++] = v1[j];
1386     }
1387 
1388     /* B-part */
1389     for (j = 0; j < countB; j++) {
1390       if (garray[bjj[j]] > rstart) {
1391         if (reuse == MAT_INITIAL_MATRIX) {
1392           PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj]));
1393           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj]));
1394         }
1395         val[jj++] = v2[j];
1396       }
1397     }
1398     irow++;
1399   }
1400   PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av));
1401   PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv));
1402   PetscFunctionReturn(PETSC_SUCCESS);
1403 }
1404 
MatConvertToTriples_diagonal_xaij(Mat A,PETSC_UNUSED PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1406 {
1407   const PetscScalar *av;
1408   const PetscInt     M = A->rmap->n;
1409   PetscCount         i;
1410   PetscMUMPSInt     *row, *col;
1411   Vec                v;
1412 
1413   PetscFunctionBegin;
1414   PetscCall(MatDiagonalGetDiagonal(A, &v));
1415   PetscCall(VecGetArrayRead(v, &av));
1416   if (reuse == MAT_INITIAL_MATRIX) {
1417     PetscCall(PetscMalloc2(M, &row, M, &col));
1418     for (i = 0; i < M; i++) {
1419       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1420       col[i] = row[i];
1421     }
1422     mumps->val = (PetscScalar *)av;
1423     mumps->irn = row;
1424     mumps->jcn = col;
1425     mumps->nnz = M;
1426   } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */
1427   else mumps->val = (PetscScalar *)av;                                      /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */
1428   PetscCall(VecRestoreArrayRead(v, &av));
1429   PetscFunctionReturn(PETSC_SUCCESS);
1430 }
1431 
MatConvertToTriples_dense_xaij(Mat A,PETSC_UNUSED PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1433 {
1434   PetscScalar   *v;
1435   const PetscInt m = A->rmap->n, N = A->cmap->N;
1436   PetscInt       lda;
1437   PetscCount     i, j;
1438   PetscMUMPSInt *row, *col;
1439 
1440   PetscFunctionBegin;
1441   PetscCall(MatDenseGetArray(A, &v));
1442   PetscCall(MatDenseGetLDA(A, &lda));
1443   if (reuse == MAT_INITIAL_MATRIX) {
1444     PetscCall(PetscMalloc2(m * N, &row, m * N, &col));
1445     for (i = 0; i < m; i++) {
1446       col[i] = 0;
1447       PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i]));
1448     }
1449     for (j = 1; j < N; j++) {
1450       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j));
1451       PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m));
1452     }
1453     if (lda == m) mumps->val = v;
1454     else {
1455       PetscCall(PetscMalloc1(m * N, &mumps->val));
1456       mumps->val_alloc = mumps->val;
1457       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1458     }
1459     mumps->irn = row;
1460     mumps->jcn = col;
1461     mumps->nnz = m * N;
1462   } else {
1463     if (lda == m && !mumps->nest_vals) mumps->val = v;
1464     else {
1465       for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m));
1466     }
1467   }
1468   PetscCall(MatDenseRestoreArray(A, &v));
1469   PetscFunctionReturn(PETSC_SUCCESS);
1470 }
1471 
1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a
1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated
1474 // and its rows and columns permuted
1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest()
MatGetTranspose_TransposeVirtual(Mat * sub,PetscBool * conjugate,PetscScalar * vshift,PetscScalar * vscale,PetscBool * swap)1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap)
1477 {
1478   Mat         A;
1479   PetscScalar s[2];
1480   PetscBool   isTrans, isHTrans, compare;
1481 
1482   PetscFunctionBegin;
1483   do {
1484     PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans));
1485     if (isTrans) {
1486       PetscCall(MatTransposeGetMat(*sub, &A));
1487       isHTrans = PETSC_FALSE;
1488     } else {
1489       PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans));
1490       if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A));
1491     }
1492     compare = (PetscBool)(isTrans || isHTrans);
1493     if (compare) {
1494       if (vshift && vscale) {
1495         PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1496         if (!*conjugate) {
1497           *vshift += s[0] * *vscale;
1498           *vscale *= s[1];
1499         } else {
1500           *vshift += PetscConj(s[0]) * *vscale;
1501           *vscale *= PetscConj(s[1]);
1502         }
1503       }
1504       if (swap) *swap = (PetscBool)!*swap;
1505       if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate;
1506       *sub = A;
1507     }
1508   } while (compare);
1509   PetscFunctionReturn(PETSC_SUCCESS);
1510 }
1511 
MatConvertToTriples_nest_xaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps)
1513 {
1514   Mat     **mats;
1515   PetscInt  nr, nc;
1516   PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
1520   if (reuse == MAT_INITIAL_MATRIX) {
1521     PetscMUMPSInt *irns, *jcns;
1522     PetscScalar   *vals;
1523     PetscCount     totnnz, cumnnz, maxnnz;
1524     PetscInt      *pjcns_w, Mbs = 0;
1525     IS            *rows, *cols;
1526     PetscInt     **rows_idx, **cols_idx;
1527 
1528     cumnnz = 0;
1529     maxnnz = 0;
1530     PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples));
1531     for (PetscInt r = 0; r < nr; r++) {
1532       for (PetscInt c = 0; c < nc; c++) {
1533         Mat sub = mats[r][c];
1534 
1535         mumps->nest_convert_to_triples[r * nc + c] = NULL;
1536         if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */
1537         if (sub) {
1538           PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL;
1539           PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
1540           MatInfo   info;
1541 
1542           PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
1543           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
1544           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
1545           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
1546           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
1547           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
1548           PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
1549           PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
1550           PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
1551 
1552           if (chol) {
1553             if (r == c) {
1554               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij;
1555               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij;
1556               else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij;
1557               else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij;
1558               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1559               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1560             } else {
1561               if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1562               else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1563               else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1564               else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1565               else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1566               else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1567             }
1568           } else {
1569             if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij;
1570             else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij;
1571             else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij;
1572             else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij;
1573             else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij;
1574             else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij;
1575           }
1576           PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name);
1577           mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples;
1578           PetscCall(MatGetInfo(sub, MAT_LOCAL, &info));
1579           cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */
1580           maxnnz = PetscMax(maxnnz, info.nz_used);
1581         }
1582       }
1583     }
1584 
1585     /* Allocate total COO */
1586     totnnz = cumnnz;
1587     PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns));
1588     PetscCall(PetscMalloc1(totnnz, &vals));
1589 
1590     /* Handle rows and column maps
1591        We directly map rows and use an SF for the columns */
1592     PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx));
1593     PetscCall(MatNestGetISs(A, rows, cols));
1594     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1595     for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1596     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w));
1597     else (void)maxnnz;
1598 
1599     cumnnz = 0;
1600     for (PetscInt r = 0; r < nr; r++) {
1601       for (PetscInt c = 0; c < nc; c++) {
1602         Mat             sub    = mats[r][c];
1603         const PetscInt *ridx   = rows_idx[r];
1604         const PetscInt *cidx   = cols_idx[c];
1605         PetscScalar     vscale = 1.0, vshift = 0.0;
1606         PetscInt        rst, size, bs;
1607         PetscSF         csf;
1608         PetscBool       conjugate = PETSC_FALSE, swap = PETSC_FALSE;
1609         PetscLayout     cmap;
1610         PetscInt        innz;
1611 
1612         mumps->nest_vals_start[r * nc + c] = cumnnz;
1613         if (c == r) {
1614           PetscCall(ISGetSize(rows[r], &size));
1615           if (!mumps->nest_convert_to_triples[r * nc + c]) {
1616             for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1617           }
1618           PetscCall(MatGetBlockSize(sub, &bs));
1619           Mbs += size / bs;
1620         }
1621         if (!mumps->nest_convert_to_triples[r * nc + c]) continue;
1622 
1623         /* Extract inner blocks if needed */
1624         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap));
1625         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1626 
1627         /* Get column layout to map off-process columns */
1628         PetscCall(MatGetLayouts(sub, NULL, &cmap));
1629 
1630         /* Get row start to map on-process rows */
1631         PetscCall(MatGetOwnershipRange(sub, &rst, NULL));
1632 
1633         /* Directly use the mumps datastructure and use C ordering for now */
1634         PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps));
1635 
1636         /* Swap the role of rows and columns indices for transposed blocks
1637            since we need values with global final ordering */
1638         if (swap) {
1639           cidx = rows_idx[r];
1640           ridx = cols_idx[c];
1641         }
1642 
1643         /* Communicate column indices
1644            This could have been done with a single SF but it would have complicated the code a lot.
1645            But since we do it only once, we pay the price of setting up an SF for each block */
1646         if (PetscDefined(USE_64BIT_INDICES)) {
1647           for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k];
1648         } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */
1649         PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf));
1650         PetscCall(PetscIntCast(mumps->nnz, &innz));
1651         PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w));
1652         PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1653         PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE));
1654         PetscCall(PetscSFDestroy(&csf));
1655 
1656         /* Import indices: use direct map for rows and mapped indices for columns */
1657         if (swap) {
1658           for (PetscInt k = 0; k < mumps->nnz; k++) {
1659             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k]));
1660             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k]));
1661           }
1662         } else {
1663           for (PetscInt k = 0; k < mumps->nnz; k++) {
1664             PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k]));
1665             PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k]));
1666           }
1667         }
1668 
1669         /* Import values to full COO */
1670         if (conjugate) { /* conjugate the entries */
1671           PetscScalar *v = vals + cumnnz;
1672           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]);
1673         } else if (vscale != 1.0) {
1674           PetscScalar *v = vals + cumnnz;
1675           for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k];
1676         } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz));
1677 
1678         /* Shift new starting point and sanity check */
1679         cumnnz += mumps->nnz;
1680         PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1681 
1682         /* Free scratch memory */
1683         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1684         PetscCall(PetscFree(mumps->val_alloc));
1685         mumps->val = NULL;
1686         mumps->nnz = 0;
1687       }
1688     }
1689     if (mumps->id.ICNTL(15) == 1) {
1690       if (Mbs != A->rmap->N) {
1691         PetscMPIInt rank, size;
1692 
1693         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1694         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1695         if (rank == 0) {
1696           PetscInt shift = 0;
1697 
1698           PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk));
1699           PetscCall(PetscFree(mumps->id.blkptr));
1700           PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr));
1701           mumps->id.blkptr[0] = 1;
1702           for (PetscInt i = 0; i < size; ++i) {
1703             for (PetscInt r = 0; r < nr; r++) {
1704               Mat             sub = mats[r][r];
1705               const PetscInt *ranges;
1706               PetscInt        bs;
1707 
1708               for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row
1709               PetscCall(MatGetOwnershipRanges(sub, &ranges));
1710               PetscCall(MatGetBlockSize(sub, &bs));
1711               for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1));
1712               shift += (ranges[i + 1] - ranges[i]) / bs;
1713             }
1714           }
1715         }
1716       } else mumps->id.ICNTL(15) = 0;
1717     }
1718     if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w));
1719     for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r]));
1720     for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c]));
1721     PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx));
1722     if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz);
1723     mumps->nest_vals_start[nr * nc] = cumnnz;
1724 
1725     /* Set pointers for final MUMPS data structure */
1726     mumps->nest_vals = vals;
1727     mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */
1728     mumps->val       = vals;
1729     mumps->irn       = irns;
1730     mumps->jcn       = jcns;
1731     mumps->nnz       = cumnnz;
1732   } else {
1733     PetscScalar *oval = mumps->nest_vals;
1734     for (PetscInt r = 0; r < nr; r++) {
1735       for (PetscInt c = 0; c < nc; c++) {
1736         PetscBool   conjugate = PETSC_FALSE;
1737         Mat         sub       = mats[r][c];
1738         PetscScalar vscale = 1.0, vshift = 0.0;
1739         PetscInt    midx = r * nc + c;
1740 
1741         if (!mumps->nest_convert_to_triples[midx]) continue;
1742         PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL));
1743         PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell");
1744         mumps->val = oval + mumps->nest_vals_start[midx];
1745         PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps));
1746         if (conjugate) {
1747           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1748           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]);
1749         } else if (vscale != 1.0) {
1750           PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx];
1751           for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale;
1752         }
1753       }
1754     }
1755     mumps->val = oval;
1756   }
1757   PetscFunctionReturn(PETSC_SUCCESS);
1758 }
1759 
MatDestroy_MUMPS(Mat A)1760 static PetscErrorCode MatDestroy_MUMPS(Mat A)
1761 {
1762   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
1763 
1764   PetscFunctionBegin;
1765   PetscCall(PetscFree(mumps->id.isol_loc));
1766   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
1767   PetscCall(VecScatterDestroy(&mumps->scat_sol));
1768   PetscCall(VecDestroy(&mumps->b_seq));
1769   PetscCall(VecDestroy(&mumps->x_seq));
1770   PetscCall(PetscFree(mumps->id.perm_in));
1771   PetscCall(PetscFree(mumps->id.blkvar));
1772   PetscCall(PetscFree(mumps->id.blkptr));
1773   PetscCall(PetscFree2(mumps->irn, mumps->jcn));
1774   PetscCall(PetscFree(mumps->val_alloc));
1775   PetscCall(PetscFree(mumps->info));
1776   PetscCall(PetscFree(mumps->ICNTL_pre));
1777   PetscCall(PetscFree(mumps->CNTL_pre));
1778   PetscCall(MatMumpsResetSchur_Private(mumps));
1779   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
1780     mumps->id.job = JOB_END;
1781     PetscMUMPS_c(mumps);
1782     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
1783     if (mumps->mumps_comm != MPI_COMM_NULL) {
1784       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
1785       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm));
1786     }
1787   }
1788   PetscCall(MatMumpsFreeInternalID(&mumps->id));
1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1790   if (mumps->use_petsc_omp_support) {
1791     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
1792     PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1793     PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps));
1794   }
1795 #endif
1796   PetscCall(PetscFree(mumps->ia_alloc));
1797   PetscCall(PetscFree(mumps->ja_alloc));
1798   PetscCall(PetscFree(mumps->recvcount));
1799   PetscCall(PetscFree(mumps->reqs));
1800   PetscCall(PetscFree(mumps->irhs_loc));
1801   PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples));
1802   PetscCall(PetscFree(mumps->nest_vals));
1803   PetscCall(PetscFree(A->data));
1804 
1805   /* clear composed functions */
1806   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1807   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
1808   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL));
1809   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL));
1810   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL));
1811   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL));
1812   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL));
1813   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL));
1814   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL));
1815   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL));
1816   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL));
1817   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL));
1818   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL));
1819   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL));
1820   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar * array)1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array)
1826 {
1827   Mat_MUMPS        *mumps   = (Mat_MUMPS *)A->data;
1828   const PetscMPIInt ompsize = mumps->omp_comm_size;
1829   PetscInt          i, m, M, rstart;
1830 
1831   PetscFunctionBegin;
1832   PetscCall(MatGetSize(A, &M, NULL));
1833   PetscCall(MatGetLocalSize(A, &m, NULL));
1834   PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt");
1835   if (ompsize == 1) {
1836     if (!mumps->irhs_loc) {
1837       mumps->nloc_rhs = (PetscMUMPSInt)m;
1838       PetscCall(PetscMalloc1(m, &mumps->irhs_loc));
1839       PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1840       for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */
1841     }
1842     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1843   } else {
1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1845     const PetscInt *ranges;
1846     PetscMPIInt     j, k, sendcount, *petsc_ranks, *omp_ranks;
1847     MPI_Group       petsc_group, omp_group;
1848     PetscScalar    *recvbuf = NULL;
1849 
1850     if (mumps->is_omp_master) {
1851       /* Lazily initialize the omp stuff for distributed rhs */
1852       if (!mumps->irhs_loc) {
1853         PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks));
1854         PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps));
1855         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group));
1856         PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group));
1857         for (j = 0; j < ompsize; j++) omp_ranks[j] = j;
1858         PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks));
1859 
1860         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1861         mumps->nloc_rhs = 0;
1862         PetscCall(MatGetOwnershipRanges(A, &ranges));
1863         for (j = 0; j < ompsize; j++) {
1864           mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]];
1865           mumps->nloc_rhs += mumps->rhs_nrow[j];
1866         }
1867         PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc));
1868         for (j = k = 0; j < ompsize; j++) {
1869           for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */
1870         }
1871 
1872         PetscCall(PetscFree2(omp_ranks, petsc_ranks));
1873         PetscCallMPI(MPI_Group_free(&petsc_group));
1874         PetscCallMPI(MPI_Group_free(&omp_group));
1875       }
1876 
1877       /* Realloc buffers when current nrhs is bigger than what we have met */
1878       if (nrhs > mumps->max_nrhs) {
1879         PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf));
1880         PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf));
1881         mumps->max_nrhs = nrhs;
1882       }
1883 
1884       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1885       for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j]));
1886       mumps->rhs_disps[0] = 0;
1887       for (j = 1; j < ompsize; j++) {
1888         mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1];
1889         PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!");
1890       }
1891       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1892     }
1893 
1894     PetscCall(PetscMPIIntCast(m * nrhs, &sendcount));
1895     PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm));
1896 
1897     if (mumps->is_omp_master) {
1898       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1899         PetscScalar *dst, *dstbase = mumps->rhs_loc;
1900         for (j = 0; j < ompsize; j++) {
1901           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1902           dst                    = dstbase;
1903           for (i = 0; i < nrhs; i++) {
1904             PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j]));
1905             src += mumps->rhs_nrow[j];
1906             dst += mumps->nloc_rhs;
1907           }
1908           dstbase += mumps->rhs_nrow[j];
1909         }
1910       }
1911       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc));
1912     }
1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1914   }
1915   mumps->id.nrhs     = (PetscMUMPSInt)nrhs;
1916   mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs;
1917   mumps->id.lrhs_loc = mumps->nloc_rhs;
1918   mumps->id.irhs_loc = mumps->irhs_loc;
1919   PetscFunctionReturn(PETSC_SUCCESS);
1920 }
1921 
MatSolve_MUMPS(Mat A,Vec b,Vec x)1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x)
1923 {
1924   Mat_MUMPS         *mumps  = (Mat_MUMPS *)A->data;
1925   const PetscScalar *barray = NULL;
1926   PetscScalar       *array;
1927   IS                 is_iden, is_petsc;
1928   PetscInt           i;
1929   PetscBool          second_solve = PETSC_FALSE;
1930   static PetscBool   cite1 = PETSC_FALSE, cite2 = PETSC_FALSE;
1931 
1932   PetscFunctionBegin;
1933   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM "
1934                                    "Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",
1935                                    &cite1));
1936   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel "
1937                                    "Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",
1938                                    &cite2));
1939 
1940   PetscCall(VecFlag(x, A->factorerrortype));
1941   if (A->factorerrortype) {
1942     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
1943     PetscFunctionReturn(PETSC_SUCCESS);
1944   }
1945 
1946   mumps->id.nrhs = 1;
1947   if (mumps->petsc_size > 1) {
1948     if (mumps->ICNTL20 == 10) {
1949       mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */
1950       PetscCall(VecGetArrayRead(b, &barray));
1951       PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray));
1952     } else {
1953       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/
1954       PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1955       PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD));
1956       if (!mumps->myid) {
1957         PetscCall(VecGetArray(mumps->b_seq, &array));
1958         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1959       }
1960     }
1961   } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */
1962     mumps->id.ICNTL(20) = 0;
1963     PetscCall(VecCopy(b, x));
1964     PetscCall(VecGetArray(x, &array));
1965     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
1966   }
1967 
1968   /*
1969      handle condensation step of Schur complement (if any)
1970      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1971      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1972      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1973      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1974   */
1975   if (mumps->id.size_schur > 0) {
1976     PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
1977     if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
1978       second_solve = PETSC_TRUE;
1979       PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
1980       mumps->id.ICNTL(26) = 1;                                /* condensation phase */
1981     } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
1982   }
1983 
1984   mumps->id.job = JOB_SOLVE;
1985   PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs
1986   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
1987 
1988   /* handle expansion step of Schur complement (if any) */
1989   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
1990   else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
1991     PetscCall(MatMumpsSolveSchur_Private(A));
1992     for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i);
1993   }
1994 
1995   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */
1996     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1997       /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1998       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1999     }
2000     if (!mumps->scat_sol) { /* create scatter scat_sol */
2001       PetscInt *isol2_loc = NULL;
2002       PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */
2003       PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc));
2004       for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1;                        /* change Fortran style to C style */
2005       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */
2006       PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol));
2007       PetscCall(ISDestroy(&is_iden));
2008       PetscCall(ISDestroy(&is_petsc));
2009       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
2010     }
2011 
2012     PetscScalar *xarray;
2013     PetscCall(VecGetArray(mumps->x_seq, &xarray));
2014     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray));
2015     PetscCall(VecRestoreArray(mumps->x_seq, &xarray));
2016     PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2017     PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD));
2018 
2019     if (mumps->ICNTL20 == 10) { // distributed RHS
2020       PetscCall(VecRestoreArrayRead(b, &barray));
2021     } else if (!mumps->myid) { // centralized RHS
2022       PetscCall(VecRestoreArray(mumps->b_seq, &array));
2023     }
2024   } else {
2025     // id.rhs has the solution in mumps precision
2026     PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array));
2027     PetscCall(VecRestoreArray(x, &array));
2028   }
2029 
2030   PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x)
2035 {
2036   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2037   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2038 
2039   PetscFunctionBegin;
2040   mumps->id.ICNTL(9) = 0;
2041   PetscCall(MatSolve_MUMPS(A, b, x));
2042   mumps->id.ICNTL(9) = value;
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
MatMatSolve_MUMPS(Mat A,Mat B,Mat X)2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X)
2047 {
2048   Mat                Bt = NULL;
2049   PetscBool          denseX, denseB, flg, flgT;
2050   Mat_MUMPS         *mumps = (Mat_MUMPS *)A->data;
2051   PetscInt           i, nrhs, M, nrhsM;
2052   PetscScalar       *array;
2053   const PetscScalar *barray;
2054   PetscInt           lsol_loc, nlsol_loc, *idxx, iidx = 0;
2055   PetscMUMPSInt     *isol_loc, *isol_loc_save;
2056   PetscScalar       *sol_loc;
2057   void              *sol_loc_save;
2058   PetscCount         sol_loc_len_save;
2059   IS                 is_to, is_from;
2060   PetscInt           k, proc, j, m, myrstart;
2061   const PetscInt    *rstart;
2062   Vec                v_mpi, msol_loc;
2063   VecScatter         scat_sol;
2064   Vec                b_seq;
2065   VecScatter         scat_rhs;
2066   PetscScalar       *aa;
2067   PetscInt           spnr, *ia, *ja;
2068   Mat_MPIAIJ        *b = NULL;
2069 
2070   PetscFunctionBegin;
2071   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL));
2072   PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
2073 
2074   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL));
2075 
2076   if (denseB) {
2077     PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution");
2078     mumps->id.ICNTL(20) = 0; /* dense RHS */
2079   } else {                   /* sparse B */
2080     PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices");
2081     PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT));
2082     PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix");
2083     PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
2084     /* input B is transpose of actual RHS matrix,
2085      because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
2086     PetscCall(MatTransposeGetMat(B, &Bt));
2087     mumps->id.ICNTL(20) = 1; /* sparse RHS */
2088   }
2089 
2090   PetscCall(MatGetSize(B, &M, &nrhs));
2091   PetscCall(PetscIntMultError(nrhs, M, &nrhsM));
2092   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
2093   mumps->id.lrhs = (PetscMUMPSInt)M;
2094 
2095   if (mumps->petsc_size == 1) { // handle this easy case specially and return early
2096     PetscScalar *aa;
2097     PetscInt     spnr, *ia, *ja;
2098     PetscBool    second_solve = PETSC_FALSE;
2099 
2100     PetscCall(MatDenseGetArray(X, &array));
2101     if (denseB) {
2102       /* copy B to X */
2103       PetscCall(MatDenseGetArrayRead(B, &barray));
2104       PetscCall(PetscArraycpy(array, barray, nrhsM));
2105       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2106     } else { /* sparse B */
2107       PetscCall(MatSeqAIJGetArray(Bt, &aa));
2108       PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2109       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2110       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2111       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2112     }
2113     PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2114 
2115     /* handle condensation step of Schur complement (if any) */
2116     if (mumps->id.size_schur > 0) {
2117       if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) {
2118         second_solve = PETSC_TRUE;
2119         PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs
2120         mumps->id.ICNTL(26) = 1;                                /* condensation phase, i.e, to solve id.redrhs */
2121       } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE));
2122     }
2123 
2124     mumps->id.job = JOB_SOLVE;
2125     PetscMUMPS_c(mumps);
2126     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2127 
2128     /* handle expansion step of Schur complement (if any) */
2129     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE));
2130     else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side
2131       PetscCall(MatMumpsSolveSchur_Private(A));
2132       for (j = 0; j < nrhs; ++j)
2133         for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs);
2134     }
2135 
2136     if (!denseB) { /* sparse B, restore ia, ja */
2137       PetscCall(MatSeqAIJRestoreArray(Bt, &aa));
2138       PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2139       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2140     }
2141 
2142     // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X.
2143     PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array));
2144     PetscCall(MatDenseRestoreArray(X, &array));
2145     PetscFunctionReturn(PETSC_SUCCESS);
2146   }
2147 
2148   /* parallel case: MUMPS requires rhs B to be centralized on the host! */
2149   PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc");
2150 
2151   /* create msol_loc to hold mumps local solution */
2152   isol_loc_save         = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */
2153   sol_loc_save          = mumps->id.sol_loc;
2154   sol_loc_len_save      = mumps->id.sol_loc_len;
2155   mumps->id.isol_loc    = NULL; // an init state
2156   mumps->id.sol_loc     = NULL;
2157   mumps->id.sol_loc_len = 0;
2158 
2159   lsol_loc = mumps->id.lsol_loc;
2160   PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */
2161   PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc));
2162   PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2163   mumps->id.isol_loc = isol_loc;
2164 
2165   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc));
2166 
2167   if (denseB) {
2168     if (mumps->ICNTL20 == 10) {
2169       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
2170       PetscCall(MatDenseGetArrayRead(B, &barray));
2171       PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc
2172       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2173       PetscCall(MatGetLocalSize(B, &m, NULL));
2174       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X
2175     } else {
2176       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
2177       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
2178         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
2179         0, re-arrange B into desired order, which is a local operation.
2180       */
2181 
2182       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
2183       /* wrap dense rhs matrix B into a vector v_mpi */
2184       PetscCall(MatGetLocalSize(B, &m, NULL));
2185       PetscCall(MatDenseGetArrayRead(B, &barray));
2186       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi));
2187       PetscCall(MatDenseRestoreArrayRead(B, &barray));
2188 
2189       /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */
2190       if (!mumps->myid) {
2191         PetscInt *idx;
2192         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
2193         PetscCall(PetscMalloc1(nrhsM, &idx));
2194         PetscCall(MatGetOwnershipRanges(B, &rstart));
2195         for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) {
2196           for (j = 0; j < nrhs; j++) {
2197             for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i;
2198           }
2199         }
2200 
2201         PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq));
2202         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to));
2203         PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from));
2204       } else {
2205         PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq));
2206         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to));
2207         PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from));
2208       }
2209 
2210       PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs));
2211       PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2212       PetscCall(ISDestroy(&is_to));
2213       PetscCall(ISDestroy(&is_from));
2214       PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD));
2215 
2216       if (!mumps->myid) { /* define rhs on the host */
2217         PetscCall(VecGetArrayRead(b_seq, &barray));
2218         PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs));
2219         PetscCall(VecRestoreArrayRead(b_seq, &barray));
2220       }
2221     }
2222   } else { /* sparse B */
2223     b = (Mat_MPIAIJ *)Bt->data;
2224 
2225     /* wrap dense X into a vector v_mpi */
2226     PetscCall(MatGetLocalSize(X, &m, NULL));
2227     PetscCall(MatDenseGetArrayRead(X, &barray));
2228     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi));
2229     PetscCall(MatDenseRestoreArrayRead(X, &barray));
2230 
2231     if (!mumps->myid) {
2232       PetscCall(MatSeqAIJGetArray(b->A, &aa));
2233       PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2234       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
2235       PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
2236       PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
2237     } else {
2238       mumps->id.irhs_ptr    = NULL;
2239       mumps->id.irhs_sparse = NULL;
2240       mumps->id.nz_rhs      = 0;
2241       if (mumps->id.rhs_sparse_len) {
2242         PetscCall(PetscFree(mumps->id.rhs_sparse));
2243         mumps->id.rhs_sparse_len = 0;
2244       }
2245     }
2246   }
2247 
2248   /* solve phase */
2249   mumps->id.job = JOB_SOLVE;
2250   PetscMUMPS_c(mumps);
2251   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2252 
2253   /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */
2254   PetscCall(MatDenseGetArray(X, &array));
2255   PetscCall(VecPlaceArray(v_mpi, array));
2256 
2257   /* create scatter scat_sol */
2258   PetscCall(MatGetOwnershipRanges(X, &rstart));
2259   /* iidx: index for scatter mumps solution to PETSc X */
2260 
2261   PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from));
2262   PetscCall(PetscMalloc1(nlsol_loc, &idxx));
2263   for (i = 0; i < lsol_loc; i++) {
2264     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
2265 
2266     for (proc = 0; proc < mumps->petsc_size; proc++) {
2267       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) {
2268         myrstart = rstart[proc];
2269         k        = isol_loc[i] - myrstart;          /* local index on 1st column of PETSc vector X */
2270         iidx     = k + myrstart * nrhs;             /* maps mumps isol_loc[i] to PETSc index in X */
2271         m        = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */
2272         break;
2273       }
2274     }
2275 
2276     for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m;
2277   }
2278   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to));
2279   PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[]
2280   PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol));
2281   PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2282   PetscCall(ISDestroy(&is_from));
2283   PetscCall(ISDestroy(&is_to));
2284   PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD));
2285   PetscCall(MatDenseRestoreArray(X, &array));
2286 
2287   if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers
2288     mumps->id.sol_loc_len = 0;
2289     PetscCall(PetscFree(mumps->id.sol_loc));
2290   }
2291 
2292   // restore old values
2293   mumps->id.sol_loc     = sol_loc_save;
2294   mumps->id.sol_loc_len = sol_loc_len_save;
2295   mumps->id.isol_loc    = isol_loc_save;
2296 
2297   PetscCall(PetscFree2(sol_loc, isol_loc));
2298   PetscCall(PetscFree(idxx));
2299   PetscCall(VecDestroy(&msol_loc));
2300   PetscCall(VecDestroy(&v_mpi));
2301   if (!denseB) {
2302     if (!mumps->myid) {
2303       b = (Mat_MPIAIJ *)Bt->data;
2304       PetscCall(MatSeqAIJRestoreArray(b->A, &aa));
2305       PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
2306       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure");
2307     }
2308   } else {
2309     if (mumps->ICNTL20 == 0) {
2310       PetscCall(VecDestroy(&b_seq));
2311       PetscCall(VecScatterDestroy(&scat_rhs));
2312     }
2313   }
2314   PetscCall(VecScatterDestroy(&scat_sol));
2315   PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n)));
2316   PetscFunctionReturn(PETSC_SUCCESS);
2317 }
2318 
MatMatSolveTranspose_MUMPS(Mat A,Mat B,Mat X)2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X)
2320 {
2321   Mat_MUMPS          *mumps = (Mat_MUMPS *)A->data;
2322   const PetscMUMPSInt value = mumps->id.ICNTL(9);
2323 
2324   PetscFunctionBegin;
2325   mumps->id.ICNTL(9) = 0;
2326   PetscCall(MatMatSolve_MUMPS(A, B, X));
2327   mumps->id.ICNTL(9) = value;
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X)
2332 {
2333   PetscBool flg;
2334   Mat       B;
2335 
2336   PetscFunctionBegin;
2337   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
2338   PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix");
2339 
2340   /* Create B=Bt^T that uses Bt's data structure */
2341   PetscCall(MatCreateTranspose(Bt, &B));
2342 
2343   PetscCall(MatMatSolve_MUMPS(A, B, X));
2344   PetscCall(MatDestroy(&B));
2345   PetscFunctionReturn(PETSC_SUCCESS);
2346 }
2347 
2348 #if !defined(PETSC_USE_COMPLEX)
2349 /*
2350   input:
2351    F:        numeric factor
2352   output:
2353    nneg:     total number of negative pivots
2354    nzero:    total number of zero pivots
2355    npos:     (global dimension of F) - nneg - nzero
2356 */
MatGetInertia_SBAIJMUMPS(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
2358 {
2359   Mat_MUMPS  *mumps = (Mat_MUMPS *)F->data;
2360   PetscMPIInt size;
2361 
2362   PetscFunctionBegin;
2363   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size));
2364   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
2365   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13));
2366 
2367   if (nneg) *nneg = mumps->id.INFOG(12);
2368   if (nzero || npos) {
2369     PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
2370     if (nzero) *nzero = mumps->id.INFOG(28);
2371     if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
2372   }
2373   PetscFunctionReturn(PETSC_SUCCESS);
2374 }
2375 #endif
2376 
MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS * mumps)2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps)
2378 {
2379   PetscMPIInt    nreqs;
2380   PetscMUMPSInt *irn, *jcn;
2381   PetscMPIInt    count;
2382   PetscCount     totnnz, remain;
2383   const PetscInt osize = mumps->omp_comm_size;
2384   PetscScalar   *val;
2385 
2386   PetscFunctionBegin;
2387   if (osize > 1) {
2388     if (reuse == MAT_INITIAL_MATRIX) {
2389       /* master first gathers counts of nonzeros to receive */
2390       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount));
2391       PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm));
2392 
2393       /* Then each computes number of send/recvs */
2394       if (mumps->is_omp_master) {
2395         /* Start from 1 since self communication is not done in MPI */
2396         nreqs = 0;
2397         for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX;
2398       } else {
2399         nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX));
2400       }
2401       PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */
2402 
2403       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
2404          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
2405          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
2406          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
2407        */
2408       nreqs = 0; /* counter for actual send/recvs */
2409       if (mumps->is_omp_master) {
2410         totnnz = 0;
2411 
2412         for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
2413         PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn));
2414         PetscCall(PetscMalloc1(totnnz, &val));
2415 
2416         /* Self communication */
2417         PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz));
2418         PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz));
2419         PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz));
2420 
2421         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
2422         PetscCall(PetscFree2(mumps->irn, mumps->jcn));
2423         PetscCall(PetscFree(mumps->val_alloc));
2424         mumps->nnz = totnnz;
2425         mumps->irn = irn;
2426         mumps->jcn = jcn;
2427         mumps->val = mumps->val_alloc = val;
2428 
2429         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
2430         jcn += mumps->recvcount[0];
2431         val += mumps->recvcount[0];
2432 
2433         /* Remote communication */
2434         for (PetscMPIInt i = 1; i < osize; i++) {
2435           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2436           remain = mumps->recvcount[i] - count;
2437           while (count > 0) {
2438             PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2439             PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2440             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2441             irn += count;
2442             jcn += count;
2443             val += count;
2444             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2445             remain -= count;
2446           }
2447         }
2448       } else {
2449         irn    = mumps->irn;
2450         jcn    = mumps->jcn;
2451         val    = mumps->val;
2452         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2453         remain = mumps->nnz - count;
2454         while (count > 0) {
2455           PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2456           PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2457           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2458           irn += count;
2459           jcn += count;
2460           val += count;
2461           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2462           remain -= count;
2463         }
2464       }
2465     } else {
2466       nreqs = 0;
2467       if (mumps->is_omp_master) {
2468         val = mumps->val + mumps->recvcount[0];
2469         for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */
2470           count  = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX);
2471           remain = mumps->recvcount[i] - count;
2472           while (count > 0) {
2473             PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2474             val += count;
2475             count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2476             remain -= count;
2477           }
2478         }
2479       } else {
2480         val    = mumps->val;
2481         count  = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX);
2482         remain = mumps->nnz - count;
2483         while (count > 0) {
2484           PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++]));
2485           val += count;
2486           count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX);
2487           remain -= count;
2488         }
2489       }
2490     }
2491     PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE));
2492     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
2493   }
2494   PetscFunctionReturn(PETSC_SUCCESS);
2495 }
2496 
MatFactorNumeric_MUMPS(Mat F,Mat A,PETSC_UNUSED const MatFactorInfo * info)2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info)
2498 {
2499   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
2500 
2501   PetscFunctionBegin;
2502   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
2503     if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2504     PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2505     PetscFunctionReturn(PETSC_SUCCESS);
2506   }
2507 
2508   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
2509   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps));
2510 
2511   /* numerical factorization phase */
2512   mumps->id.job = JOB_FACTNUMERIC;
2513   if (!mumps->id.ICNTL(18)) { /* A is centralized */
2514     if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2515   } else {
2516     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2517   }
2518 
2519   if (F->schur) {
2520     const PetscScalar *array;
2521     MUMPS_INT          size = mumps->id.size_schur;
2522     PetscCall(MatDenseGetArrayRead(F->schur, &array));
2523     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur));
2524     PetscCall(MatDenseRestoreArrayRead(F->schur, &array));
2525   }
2526 
2527   if (mumps->id.ICNTL(22)) PetscCall(PetscStrncpy(mumps->id.ooc_prefix, ((PetscObject)F)->prefix, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_prefix)));
2528 
2529   PetscMUMPS_c(mumps);
2530   if (mumps->id.INFOG(1) < 0) {
2531     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
2532     if (mumps->id.INFOG(1) == -10) {
2533       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2534       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
2535     } else if (mumps->id.INFOG(1) == -13) {
2536       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2537       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2538     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
2539       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2540       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2541     } else {
2542       PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2543       F->factorerrortype = MAT_FACTOR_OTHER;
2544     }
2545   }
2546   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16));
2547 
2548   F->assembled = PETSC_TRUE;
2549 
2550   if (F->schur) { /* reset Schur status to unfactored */
2551 #if defined(PETSC_HAVE_CUDA)
2552     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
2553 #endif
2554     PetscScalar *array;
2555     PetscCall(MatDenseGetArray(F->schur, &array));
2556     PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array));
2557     PetscCall(MatDenseRestoreArray(F->schur, &array));
2558     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2559       mumps->id.ICNTL(19) = 2;
2560       PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
2561     }
2562     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
2563   }
2564 
2565   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
2566   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
2567 
2568   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
2569   // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the
2570   // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for
2571   // which the solution (in SOL_loc) is available on the local processor.
2572   // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will
2573   // normally have the same contents for each of these calls. The only exception is the case of
2574   // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non
2575   // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase.
2576   if (mumps->petsc_size > 1) {
2577     PetscInt     lsol_loc;
2578     PetscScalar *array;
2579 
2580     /* distributed solution; Create x_seq=sol_loc for repeated use */
2581     if (mumps->x_seq) {
2582       PetscCall(VecScatterDestroy(&mumps->scat_sol));
2583       PetscCall(PetscFree(mumps->id.isol_loc));
2584       PetscCall(VecDestroy(&mumps->x_seq));
2585     }
2586     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
2587     PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc));
2588     PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq));
2589     PetscCall(VecGetArray(mumps->x_seq, &array));
2590     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc));
2591     PetscCall(VecRestoreArray(mumps->x_seq, &array));
2592     mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc;
2593   }
2594   PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2)));
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 /* Sets MUMPS options from the options database */
MatSetFromOptions_MUMPS(Mat F,Mat A)2599 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
2600 {
2601   Mat_MUMPS    *mumps = (Mat_MUMPS *)F->data;
2602   PetscReal     cntl;
2603   PetscMUMPSInt icntl = 0, size, *listvar_schur;
2604   PetscInt      info[80], i, ninfo = 80, rbs, cbs;
2605   PetscBool     flg   = PETSC_FALSE;
2606   PetscBool     schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1);
2607   void         *arr;
2608 
2609   PetscFunctionBegin;
2610   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat");
2611   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
2612     PetscPrecision precision  = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE;
2613     PetscInt       nthreads   = 0;
2614     PetscInt       nCNTL_pre  = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2615     PetscInt       nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
2616     PetscMUMPSInt  nblk, *blkvar, *blkptr;
2617 
2618     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
2619     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size));
2620     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */
2621 
2622     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support));
2623     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
2624     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
2625     PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL));
2626     if (mumps->use_petsc_omp_support) {
2627       PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2628 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
2629       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl));
2630       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master));
2631 #else
2632       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",
2633               ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : "");
2634 #endif
2635     } else {
2636       mumps->omp_comm      = PETSC_COMM_SELF;
2637       mumps->mumps_comm    = mumps->petsc_comm;
2638       mumps->is_omp_master = PETSC_TRUE;
2639     }
2640     PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size));
2641     mumps->reqs = NULL;
2642     mumps->tag  = 0;
2643 
2644     if (mumps->mumps_comm != MPI_COMM_NULL) {
2645       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
2646         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
2647         MPI_Comm comm;
2648         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm));
2649         mumps->mumps_comm = comm;
2650       } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm));
2651     }
2652 
2653     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
2654     mumps->id.job          = JOB_INIT;
2655     mumps->id.par          = 1; /* host participates factorizaton and solve */
2656     mumps->id.sym          = mumps->sym;
2657 
2658     size          = mumps->id.size_schur;
2659     arr           = mumps->id.schur;
2660     listvar_schur = mumps->id.listvar_schur;
2661     nblk          = mumps->id.nblk;
2662     blkvar        = mumps->id.blkvar;
2663     blkptr        = mumps->id.blkptr;
2664     if (PetscDefined(USE_DEBUG)) {
2665       for (PetscInt i = 0; i < size; i++)
2666         PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1,
2667                    A->rmap->N);
2668     }
2669 
2670     PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL));
2671     PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]);
2672     PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar");
2673     PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision));
2674 
2675     PetscMUMPS_c(mumps);
2676     PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2677 
2678     /* set PETSc-MUMPS default options - override MUMPS default */
2679     mumps->id.ICNTL(3) = 0;
2680     mumps->id.ICNTL(4) = 0;
2681     if (mumps->petsc_size == 1) {
2682       mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
2683       mumps->id.ICNTL(7)  = 7; /* automatic choice of ordering done by the package */
2684     } else {
2685       mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
2686       mumps->id.ICNTL(21) = 1; /* distributed solution */
2687     }
2688     if (nblk && blkptr) {
2689       mumps->id.ICNTL(15) = 1;
2690       mumps->id.nblk      = nblk;
2691       mumps->id.blkvar    = blkvar;
2692       mumps->id.blkptr    = blkptr;
2693     } else mumps->id.ICNTL(15) = 0;
2694 
2695     /* restore cached ICNTL and CNTL values */
2696     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl];
2697     for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]);
2698 
2699     PetscCall(PetscFree(mumps->ICNTL_pre));
2700     PetscCall(PetscFree(mumps->CNTL_pre));
2701 
2702     if (schur) {
2703       mumps->id.size_schur    = size;
2704       mumps->id.schur_lld     = size;
2705       mumps->id.schur         = arr;
2706       mumps->id.listvar_schur = listvar_schur;
2707       if (mumps->petsc_size > 1) {
2708         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
2709 
2710         mumps->id.ICNTL(19) = 1;                                                                            /* MUMPS returns Schur centralized on the host */
2711         gs                  = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2712         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm));
2713         PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc");
2714       } else {
2715         if (F->factortype == MAT_FACTOR_LU) {
2716           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2717         } else {
2718           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2719         }
2720       }
2721       mumps->id.ICNTL(26) = -1;
2722     }
2723 
2724     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
2725        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
2726      */
2727     PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm));
2728     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm));
2729 
2730     mumps->scat_rhs = NULL;
2731     mumps->scat_sol = NULL;
2732   }
2733   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg));
2734   if (flg) mumps->id.ICNTL(1) = icntl;
2735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg));
2736   if (flg) mumps->id.ICNTL(2) = icntl;
2737   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg));
2738   if (flg) mumps->id.ICNTL(3) = icntl;
2739 
2740   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg));
2741   if (flg) mumps->id.ICNTL(4) = icntl;
2742   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
2743 
2744   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg));
2745   if (flg) mumps->id.ICNTL(6) = icntl;
2746 
2747   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg));
2748   if (flg) {
2749     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
2750     mumps->id.ICNTL(7) = icntl;
2751   }
2752 
2753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL));
2754   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
2755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL));
2756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL));
2757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL));
2758   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL));
2759   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL));
2760   PetscCall(MatGetBlockSizes(A, &rbs, &cbs));
2761   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs;
2762   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg));
2763   if (flg) {
2764     if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
2765     else if (mumps->id.ICNTL(15) > 0) {
2766       const PetscInt *bsizes;
2767       PetscInt        nblocks, p, *blkptr = NULL;
2768       PetscMPIInt    *recvcounts, *displs, n;
2769       PetscMPIInt     rank, size = 0;
2770 
2771       PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes));
2772       flg = PETSC_TRUE;
2773       for (p = 0; p < nblocks; ++p) {
2774         if (bsizes[p] > 1) break;
2775       }
2776       if (p == nblocks) flg = PETSC_FALSE;
2777       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
2778       if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1
2779         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2780         if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2781         PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs));
2782         PetscCall(PetscMPIIntCast(nblocks, &n));
2783         PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A)));
2784         for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p];
2785         PetscCall(PetscMalloc1(displs[size] + 1, &blkptr));
2786         PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2787         PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A)));
2788         if (rank == 0) {
2789           blkptr[0] = 1;
2790           for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p];
2791           PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr));
2792         }
2793         PetscCall(PetscFree2(recvcounts, displs));
2794         PetscCall(PetscFree(blkptr));
2795       }
2796     }
2797   }
2798   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL));
2799   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any Schur data (if any) */
2800     PetscCall(MatDestroy(&F->schur));
2801     PetscCall(MatMumpsResetSchur_Private(mumps));
2802   }
2803 
2804   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
2805      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
2806      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
2807      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
2808      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
2809      In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+
2810    */
2811   mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */
2812 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI)
2813   mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */
2814 #endif
2815   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg));
2816   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20);
2817 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0)
2818   PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0");
2819 #endif
2820   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
2821 
2822   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), &flg));
2823   if (flg && mumps->id.ICNTL(22) != 1) mumps->id.ICNTL(22) = 0; // MUMPS treats values other than 1 as 0. Normalize it so we can safely use 'if (mumps->id.ICNTL(22))'
2824   if (mumps->id.ICNTL(22)) {
2825     // MUMPS will use the /tmp directory if -mat_mumps_ooc_tmpdir is not set by user.
2826     // We don't provide option -mat_mumps_ooc_prefix, as we use F's prefix as OOC_PREFIX, which is set later during MatFactorNumeric_MUMPS() to also handle cases where users enable OOC via MatMumpsSetICNTL().
2827     PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "Out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir), NULL));
2828   }
2829   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL));
2830   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL));
2831   if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
2832 
2833   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL));
2834   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL));
2835   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL));
2836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL));
2837   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL));
2838   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
2839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL));
2840   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
2841   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL));
2842   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL));
2843   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL));
2844   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL));
2845   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL));
2846   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL));
2847   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL));
2848   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL));
2849   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL));
2850 
2851   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg));
2852   if (flg) ID_CNTL_SET(mumps->id, 1, cntl);
2853   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg));
2854   if (flg) ID_CNTL_SET(mumps->id, 2, cntl);
2855   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg));
2856   if (flg) ID_CNTL_SET(mumps->id, 3, cntl);
2857   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg));
2858   if (flg) ID_CNTL_SET(mumps->id, 4, cntl);
2859   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg));
2860   if (flg) ID_CNTL_SET(mumps->id, 5, cntl);
2861   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg));
2862   if (flg) ID_CNTL_SET(mumps->id, 7, cntl);
2863 
2864   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL));
2865   if (ninfo) {
2866     PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo);
2867     PetscCall(PetscMalloc1(ninfo, &mumps->info));
2868     mumps->ninfo = ninfo;
2869     for (i = 0; i < ninfo; i++) {
2870       PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo);
2871       mumps->info[i] = info[i];
2872     }
2873   }
2874   PetscOptionsEnd();
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,PETSC_UNUSED const MatFactorInfo * info,Mat_MUMPS * mumps)2878 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps)
2879 {
2880   PetscFunctionBegin;
2881   if (mumps->id.INFOG(1) < 0) {
2882     PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1));
2883     if (mumps->id.INFOG(1) == -6) {
2884       PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2885       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
2886     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
2887       PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2888       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
2889     } else {
2890       PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2)));
2891       F->factorerrortype = MAT_FACTOR_OTHER;
2892     }
2893   }
2894   if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR;
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,PETSC_UNUSED IS c,const MatFactorInfo * info)2898 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2899 {
2900   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2901   Vec            b;
2902   const PetscInt M = A->rmap->N;
2903 
2904   PetscFunctionBegin;
2905   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2906     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2907     PetscFunctionReturn(PETSC_SUCCESS);
2908   }
2909 
2910   /* Set MUMPS options from the options database */
2911   PetscCall(MatSetFromOptions_MUMPS(F, A));
2912 
2913   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2914   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2915 
2916   /* analysis phase */
2917   mumps->id.job = JOB_FACTSYMBOLIC;
2918   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2919   switch (mumps->id.ICNTL(18)) {
2920   case 0: /* centralized assembled matrix input */
2921     if (!mumps->myid) {
2922       mumps->id.nnz = mumps->nnz;
2923       mumps->id.irn = mumps->irn;
2924       mumps->id.jcn = mumps->jcn;
2925       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2926       if (r && mumps->id.ICNTL(7) == 7) {
2927         mumps->id.ICNTL(7) = 1;
2928         if (!mumps->myid) {
2929           const PetscInt *idx;
2930           PetscInt        i;
2931 
2932           PetscCall(PetscMalloc1(M, &mumps->id.perm_in));
2933           PetscCall(ISGetIndices(r, &idx));
2934           for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */
2935           PetscCall(ISRestoreIndices(r, &idx));
2936         }
2937       }
2938     }
2939     break;
2940   case 3: /* distributed assembled matrix input (size>1) */
2941     mumps->id.nnz_loc = mumps->nnz;
2942     mumps->id.irn_loc = mumps->irn;
2943     mumps->id.jcn_loc = mumps->jcn;
2944     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
2945     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2946       PetscCall(MatCreateVecs(A, NULL, &b));
2947       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
2948       PetscCall(VecDestroy(&b));
2949     }
2950     break;
2951   }
2952   PetscMUMPS_c(mumps);
2953   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
2954 
2955   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
2956   F->ops->solve             = MatSolve_MUMPS;
2957   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
2958   F->ops->matsolve          = MatMatSolve_MUMPS;
2959   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2960   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
2961 
2962   mumps->matstruc = SAME_NONZERO_PATTERN;
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 /* Note the PETSc r and c permutations are ignored */
MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,PETSC_UNUSED IS r,PETSC_UNUSED IS c,const MatFactorInfo * info)2967 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info)
2968 {
2969   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
2970   Vec            b;
2971   const PetscInt M = A->rmap->N;
2972 
2973   PetscFunctionBegin;
2974   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2975     /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */
2976     PetscFunctionReturn(PETSC_SUCCESS);
2977   }
2978 
2979   /* Set MUMPS options from the options database */
2980   PetscCall(MatSetFromOptions_MUMPS(F, A));
2981 
2982   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2983   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
2984 
2985   /* analysis phase */
2986   mumps->id.job = JOB_FACTSYMBOLIC;
2987   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
2988   switch (mumps->id.ICNTL(18)) {
2989   case 0: /* centralized assembled matrix input */
2990     if (!mumps->myid) {
2991       mumps->id.nnz = mumps->nnz;
2992       mumps->id.irn = mumps->irn;
2993       mumps->id.jcn = mumps->jcn;
2994       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
2995     }
2996     break;
2997   case 3: /* distributed assembled matrix input (size>1) */
2998     mumps->id.nnz_loc = mumps->nnz;
2999     mumps->id.irn_loc = mumps->irn;
3000     mumps->id.jcn_loc = mumps->jcn;
3001     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3002     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3003       PetscCall(MatCreateVecs(A, NULL, &b));
3004       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3005       PetscCall(VecDestroy(&b));
3006     }
3007     break;
3008   }
3009   PetscMUMPS_c(mumps);
3010   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3011 
3012   F->ops->lufactornumeric   = MatFactorNumeric_MUMPS;
3013   F->ops->solve             = MatSolve_MUMPS;
3014   F->ops->solvetranspose    = MatSolveTranspose_MUMPS;
3015   F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS;
3016 
3017   mumps->matstruc = SAME_NONZERO_PATTERN;
3018   PetscFunctionReturn(PETSC_SUCCESS);
3019 }
3020 
3021 /* Note the PETSc r permutation and factor info are ignored */
MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,PETSC_UNUSED IS r,const MatFactorInfo * info)3022 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info)
3023 {
3024   Mat_MUMPS     *mumps = (Mat_MUMPS *)F->data;
3025   Vec            b;
3026   const PetscInt M = A->rmap->N;
3027 
3028   PetscFunctionBegin;
3029   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
3030     /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */
3031     PetscFunctionReturn(PETSC_SUCCESS);
3032   }
3033 
3034   /* Set MUMPS options from the options database */
3035   PetscCall(MatSetFromOptions_MUMPS(F, A));
3036 
3037   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
3038   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps));
3039 
3040   /* analysis phase */
3041   mumps->id.job = JOB_FACTSYMBOLIC;
3042   PetscCall(PetscMUMPSIntCast(M, &mumps->id.n));
3043   switch (mumps->id.ICNTL(18)) {
3044   case 0: /* centralized assembled matrix input */
3045     if (!mumps->myid) {
3046       mumps->id.nnz = mumps->nnz;
3047       mumps->id.irn = mumps->irn;
3048       mumps->id.jcn = mumps->jcn;
3049       if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a));
3050     }
3051     break;
3052   case 3: /* distributed assembled matrix input (size>1) */
3053     mumps->id.nnz_loc = mumps->nnz;
3054     mumps->id.irn_loc = mumps->irn;
3055     mumps->id.jcn_loc = mumps->jcn;
3056     if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc));
3057     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
3058       PetscCall(MatCreateVecs(A, NULL, &b));
3059       PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq));
3060       PetscCall(VecDestroy(&b));
3061     }
3062     break;
3063   }
3064   PetscMUMPS_c(mumps);
3065   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps));
3066 
3067   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
3068   F->ops->solve                 = MatSolve_MUMPS;
3069   F->ops->solvetranspose        = MatSolve_MUMPS;
3070   F->ops->matsolve              = MatMatSolve_MUMPS;
3071   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
3072   F->ops->matsolvetranspose     = MatMatSolveTranspose_MUMPS;
3073 #if defined(PETSC_USE_COMPLEX)
3074   F->ops->getinertia = NULL;
3075 #else
3076   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
3077 #endif
3078 
3079   mumps->matstruc = SAME_NONZERO_PATTERN;
3080   PetscFunctionReturn(PETSC_SUCCESS);
3081 }
3082 
MatView_MUMPS(Mat A,PetscViewer viewer)3083 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer)
3084 {
3085   PetscBool         isascii;
3086   PetscViewerFormat format;
3087   Mat_MUMPS        *mumps = (Mat_MUMPS *)A->data;
3088 
3089   PetscFunctionBegin;
3090   /* check if matrix is mumps type */
3091   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS);
3092 
3093   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
3094   if (isascii) {
3095     PetscCall(PetscViewerGetFormat(viewer, &format));
3096     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3097       PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n"));
3098       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
3099         PetscCall(PetscViewerASCIIPrintf(viewer, "  SYM (matrix type):                   %d\n", mumps->id.sym));
3100         PetscCall(PetscViewerASCIIPrintf(viewer, "  PAR (host participation):            %d\n", mumps->id.par));
3101         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(1) (output for error):         %d\n", mumps->id.ICNTL(1)));
3102         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2)));
3103         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(3) (output for global info):   %d\n", mumps->id.ICNTL(3)));
3104         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(4) (level of printing):        %d\n", mumps->id.ICNTL(4)));
3105         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(5) (input mat struct):         %d\n", mumps->id.ICNTL(5)));
3106         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(6) (matrix prescaling):        %d\n", mumps->id.ICNTL(6)));
3107         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7)));
3108         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(8) (scaling strategy):         %d\n", mumps->id.ICNTL(8)));
3109         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(10) (max num of refinements):  %d\n", mumps->id.ICNTL(10)));
3110         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(11) (error analysis):          %d\n", mumps->id.ICNTL(11)));
3111         if (mumps->id.ICNTL(11) > 0) {
3112           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(4) (inf norm of input mat):        %g\n", (double)ID_RINFOG_GET(mumps->id, 4)));
3113           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(5) (inf norm of solution):         %g\n", (double)ID_RINFOG_GET(mumps->id, 5)));
3114           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(6) (inf norm of residual):         %g\n", (double)ID_RINFOG_GET(mumps->id, 6)));
3115           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8)));
3116           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(9) (error estimate):               %g\n", (double)ID_RINFOG_GET(mumps->id, 9)));
3117           PetscCall(PetscViewerASCIIPrintf(viewer, "    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11)));
3118         }
3119         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(12) (efficiency control):                         %d\n", mumps->id.ICNTL(12)));
3120         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(13) (sequential factorization of the root node):  %d\n", mumps->id.ICNTL(13)));
3121         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14)));
3122         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(15) (compression of the input matrix):            %d\n", mumps->id.ICNTL(15)));
3123         /* ICNTL(15-17) not used */
3124         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(18) (input mat struct):                           %d\n", mumps->id.ICNTL(18)));
3125         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(19) (Schur complement info):                      %d\n", mumps->id.ICNTL(19)));
3126         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(20) (RHS sparse pattern):                         %d\n", mumps->id.ICNTL(20)));
3127         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(21) (solution struct):                            %d\n", mumps->id.ICNTL(21)));
3128         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(22) (in-core/out-of-core facility):               %d\n", mumps->id.ICNTL(22)));
3129         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23)));
3130 
3131         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(24) (detection of null pivot rows):               %d\n", mumps->id.ICNTL(24)));
3132         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(25) (computation of a null space basis):          %d\n", mumps->id.ICNTL(25)));
3133         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(26) (Schur options for RHS or solution):          %d\n", mumps->id.ICNTL(26)));
3134         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(27) (blocking size for multiple RHS):             %d\n", mumps->id.ICNTL(27)));
3135         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(28) (use parallel or sequential ordering):        %d\n", mumps->id.ICNTL(28)));
3136         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(29) (parallel ordering):                          %d\n", mumps->id.ICNTL(29)));
3137 
3138         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n", mumps->id.ICNTL(30)));
3139         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(31) (factors is discarded in the solve phase):    %d\n", mumps->id.ICNTL(31)));
3140         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(33) (compute determinant):                        %d\n", mumps->id.ICNTL(33)));
3141         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(35) (activate BLR based factorization):           %d\n", mumps->id.ICNTL(35)));
3142         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(36) (choice of BLR factorization variant):        %d\n", mumps->id.ICNTL(36)));
3143         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(37) (compression of the contribution blocks):     %d\n", mumps->id.ICNTL(37)));
3144         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(38) (estimated compression rate of LU factors):   %d\n", mumps->id.ICNTL(38)));
3145         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(48) (multithreading with tree parallelism):       %d\n", mumps->id.ICNTL(48)));
3146         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49)));
3147         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56)));
3148         PetscCall(PetscViewerASCIIPrintf(viewer, "  ICNTL(58) (options for symbolic factorization):         %d\n", mumps->id.ICNTL(58)));
3149 
3150         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(1) (relative pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 1)));
3151         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2)));
3152         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(3) (absolute pivoting threshold):      %g\n", (double)ID_CNTL_GET(mumps->id, 3)));
3153         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(4) (value of static pivoting):         %g\n", (double)ID_CNTL_GET(mumps->id, 4)));
3154         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(5) (fixation for null pivots):         %g\n", (double)ID_CNTL_GET(mumps->id, 5)));
3155         PetscCall(PetscViewerASCIIPrintf(viewer, "  CNTL(7) (dropping parameter for BLR):       %g\n", (double)ID_CNTL_GET(mumps->id, 7)));
3156 
3157         /* information local to each processor */
3158         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis):\n"));
3159         PetscCall(PetscViewerASCIIPushSynchronized(viewer));
3160         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1)));
3161         PetscCall(PetscViewerFlush(viewer));
3162         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization):\n"));
3163         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2)));
3164         PetscCall(PetscViewerFlush(viewer));
3165         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization):\n"));
3166         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3)));
3167         PetscCall(PetscViewerFlush(viewer));
3168 
3169         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n"));
3170         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(15)));
3171         PetscCall(PetscViewerFlush(viewer));
3172 
3173         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n"));
3174         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(16)));
3175         PetscCall(PetscViewerFlush(viewer));
3176 
3177         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization):\n"));
3178         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(23)));
3179         PetscCall(PetscViewerFlush(viewer));
3180 
3181         if (mumps->ninfo && mumps->ninfo <= 80) {
3182           PetscInt i;
3183           for (i = 0; i < mumps->ninfo; i++) {
3184             PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "):\n", mumps->info[i]));
3185             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "    [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i])));
3186             PetscCall(PetscViewerFlush(viewer));
3187           }
3188         }
3189         PetscCall(PetscViewerASCIIPopSynchronized(viewer));
3190       } else PetscCall(PetscViewerASCIIPrintf(viewer, "  Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : ""));
3191 
3192       if (mumps->myid == 0) { /* information from the host */
3193         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1)));
3194         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2)));
3195         PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3)));
3196         PetscCall(PetscViewerASCIIPrintf(viewer, "  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34)));
3197 
3198         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3)));
3199         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4)));
3200         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5)));
3201         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6)));
3202         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7)));
3203         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8)));
3204         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9)));
3205         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10)));
3206         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11)));
3207         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12)));
3208         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13)));
3209         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14)));
3210         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15)));
3211         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16)));
3212         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17)));
3213         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18)));
3214         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19)));
3215         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20)));
3216         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21)));
3217         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22)));
3218         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23)));
3219         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24)));
3220         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25)));
3221         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28)));
3222         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29)));
3223         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31)));
3224         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32)));
3225         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33)));
3226         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34)));
3227         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35)));
3228         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36)));
3229         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37)));
3230         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38)));
3231         PetscCall(PetscViewerASCIIPrintf(viewer, "  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39)));
3232       }
3233     }
3234   }
3235   PetscFunctionReturn(PETSC_SUCCESS);
3236 }
3237 
MatGetInfo_MUMPS(Mat A,PETSC_UNUSED MatInfoType flag,MatInfo * info)3238 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info)
3239 {
3240   Mat_MUMPS *mumps = (Mat_MUMPS *)A->data;
3241 
3242   PetscFunctionBegin;
3243   info->block_size        = 1.0;
3244   info->nz_allocated      = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3245   info->nz_used           = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20);
3246   info->nz_unneeded       = 0.0;
3247   info->assemblies        = 0.0;
3248   info->mallocs           = 0.0;
3249   info->memory            = 0.0;
3250   info->fill_ratio_given  = 0;
3251   info->fill_ratio_needed = 0;
3252   info->factor_mallocs    = 0;
3253   PetscFunctionReturn(PETSC_SUCCESS);
3254 }
3255 
MatFactorSetSchurIS_MUMPS(Mat F,IS is)3256 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
3257 {
3258   Mat_MUMPS      *mumps = (Mat_MUMPS *)F->data;
3259   const PetscInt *idxs;
3260   PetscInt        size, i;
3261 
3262   PetscFunctionBegin;
3263   PetscCall(ISGetLocalSize(is, &size));
3264   /* Schur complement matrix */
3265   PetscCall(MatDestroy(&F->schur));
3266   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3267   // don't allocate mumps->id.schur[] now as its precision is yet to know
3268   PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur));
3269   PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld));
3270   if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
3271 
3272   /* MUMPS expects Fortran style indices */
3273   PetscCall(PetscFree(mumps->id.listvar_schur));
3274   PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur));
3275   PetscCall(ISGetIndices(is, &idxs));
3276   for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i]));
3277   PetscCall(ISRestoreIndices(is, &idxs));
3278   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
3279   if (mumps->id.icntl) mumps->id.ICNTL(26) = -1;
3280   else mumps->ICNTL26 = -1;
3281   PetscFunctionReturn(PETSC_SUCCESS);
3282 }
3283 
MatFactorCreateSchurComplement_MUMPS(Mat F,Mat * S)3284 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S)
3285 {
3286   Mat          St;
3287   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3288   PetscScalar *array;
3289   PetscInt     i, j, N = mumps->id.size_schur;
3290 
3291   PetscFunctionBegin;
3292   PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it");
3293   PetscCall(MatCreate(PETSC_COMM_SELF, &St));
3294   PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur));
3295   PetscCall(MatSetType(St, MATDENSE));
3296   PetscCall(MatSetUp(St));
3297   PetscCall(MatDenseGetArray(St, &array));
3298   if (!mumps->sym) {                /* MUMPS always return a full matrix */
3299     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
3300       for (i = 0; i < N; i++) {
3301         for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3302       }
3303     } else { /* stored by columns */
3304       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3305     }
3306   } else {                          /* either full or lower-triangular (not packed) */
3307     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
3308       for (i = 0; i < N; i++) {
3309         for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3310       }
3311     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
3312       PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array));
3313     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
3314       for (i = 0; i < N; i++) {
3315         for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j);
3316       }
3317     }
3318   }
3319   PetscCall(MatDenseRestoreArray(St, &array));
3320   *S = St;
3321   PetscFunctionReturn(PETSC_SUCCESS);
3322 }
3323 
MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)3324 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival)
3325 {
3326   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3327 
3328   PetscFunctionBegin;
3329   if (mumps->id.job == JOB_NULL) {                                            /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
3330     PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
3331     for (i = 0; i < nICNTL_pre; ++i)
3332       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */
3333     if (i == nICNTL_pre) {                             /* not already cached */
3334       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre));
3335       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre));
3336       mumps->ICNTL_pre[0]++;
3337     }
3338     mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl;
3339     PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i));
3340   } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl)));
3341   PetscFunctionReturn(PETSC_SUCCESS);
3342 }
3343 
MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt * ival)3344 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival)
3345 {
3346   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3347 
3348   PetscFunctionBegin;
3349   if (mumps->id.job == JOB_NULL) {
3350     PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
3351     *ival = 0;
3352     for (i = 0; i < nICNTL_pre; ++i) {
3353       if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i];
3354     }
3355   } else *ival = mumps->id.ICNTL(icntl);
3356   PetscFunctionReturn(PETSC_SUCCESS);
3357 }
3358 
3359 /*@
3360   MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3361 
3362   Logically Collective
3363 
3364   Input Parameters:
3365 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3366 . icntl - index of MUMPS parameter array `ICNTL()`
3367 - ival  - value of MUMPS `ICNTL(icntl)`
3368 
3369   Options Database Key:
3370 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival`
3371 
3372   Level: beginner
3373 
3374   Note:
3375   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3376 
3377 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3378 @*/
MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)3379 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival)
3380 {
3381   PetscFunctionBegin;
3382   PetscValidType(F, 1);
3383   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3384   PetscValidLogicalCollectiveInt(F, icntl, 2);
3385   PetscValidLogicalCollectiveInt(F, ival, 3);
3386   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3387   PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
3388   PetscFunctionReturn(PETSC_SUCCESS);
3389 }
3390 
3391 /*@
3392   MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc>
3393 
3394   Logically Collective
3395 
3396   Input Parameters:
3397 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3398 - icntl - index of MUMPS parameter array ICNTL()
3399 
3400   Output Parameter:
3401 . ival - value of MUMPS ICNTL(icntl)
3402 
3403   Level: beginner
3404 
3405 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3406 @*/
MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt * ival)3407 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival)
3408 {
3409   PetscFunctionBegin;
3410   PetscValidType(F, 1);
3411   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3412   PetscValidLogicalCollectiveInt(F, icntl, 2);
3413   PetscAssertPointer(ival, 3);
3414   PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl);
3415   PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3416   PetscFunctionReturn(PETSC_SUCCESS);
3417 }
3418 
MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)3419 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val)
3420 {
3421   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3422 
3423   PetscFunctionBegin;
3424   if (mumps->id.job == JOB_NULL) {
3425     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3426     for (i = 0; i < nCNTL_pre; ++i)
3427       if (mumps->CNTL_pre[1 + 2 * i] == icntl) break;
3428     if (i == nCNTL_pre) {
3429       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre));
3430       else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre));
3431       mumps->CNTL_pre[0]++;
3432     }
3433     mumps->CNTL_pre[1 + 2 * i] = icntl;
3434     mumps->CNTL_pre[2 + 2 * i] = val;
3435   } else ID_CNTL_SET(mumps->id, icntl, val);
3436   PetscFunctionReturn(PETSC_SUCCESS);
3437 }
3438 
MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal * val)3439 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val)
3440 {
3441   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3442 
3443   PetscFunctionBegin;
3444   if (mumps->id.job == JOB_NULL) {
3445     PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
3446     *val = 0.0;
3447     for (i = 0; i < nCNTL_pre; ++i) {
3448       if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i];
3449     }
3450   } else *val = ID_CNTL_GET(mumps->id, icntl);
3451   PetscFunctionReturn(PETSC_SUCCESS);
3452 }
3453 
3454 /*@C
3455   MatMumpsSetOocTmpDir - Set MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>
3456 
3457   Logically Collective
3458 
3459   Input Parameters:
3460 + F      - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3461 - tmpdir - temporary directory for out-of-core facility.
3462 
3463   Level: beginner
3464 
3465   Note:
3466   To make it effective, this routine must be called before the numeric factorization, i.e., `PCSetUp()`.
3467   If `ooc_tmpdir` is not set, MUMPS will also check the environment variable `MUMPS_OOC_TMPDIR`. But if neither was defined, it will use /tmp by default.
3468 
3469 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3470 @*/
MatMumpsSetOocTmpDir(Mat F,const char * tmpdir)3471 PetscErrorCode MatMumpsSetOocTmpDir(Mat F, const char *tmpdir)
3472 {
3473   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3474 
3475   PetscFunctionBegin;
3476   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
3477   PetscValidType(F, 1);
3478   PetscCall(PetscStrncpy(mumps->id.ooc_tmpdir, tmpdir, sizeof(((MUMPS_STRUC_C *)NULL)->ooc_tmpdir)));
3479   PetscFunctionReturn(PETSC_SUCCESS);
3480 }
3481 
3482 /*@C
3483   MatMumpsGetOocTmpDir - Get MUMPS out-of-core `OOC_TMPDIR` <https://mumps-solver.org/index.php?page=doc>
3484 
3485   Logically Collective
3486 
3487   Input Parameter:
3488 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`.
3489 
3490   Output Parameter:
3491 . tmpdir - temporary directory for out-of-core facility.
3492 
3493   Level: beginner
3494 
3495   Note:
3496   The returned string is read-only and user should not try to change it.
3497 
3498 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetOocTmpDir`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3499 @*/
MatMumpsGetOocTmpDir(Mat F,const char ** tmpdir)3500 PetscErrorCode MatMumpsGetOocTmpDir(Mat F, const char **tmpdir)
3501 {
3502   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3503 
3504   PetscFunctionBegin;
3505   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
3506   PetscValidType(F, 1);
3507   if (tmpdir) *tmpdir = mumps->id.ooc_tmpdir;
3508   PetscFunctionReturn(PETSC_SUCCESS);
3509 }
3510 
3511 /*@
3512   MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3513 
3514   Logically Collective
3515 
3516   Input Parameters:
3517 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3518 . icntl - index of MUMPS parameter array `CNTL()`
3519 - val   - value of MUMPS `CNTL(icntl)`
3520 
3521   Options Database Key:
3522 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival
3523 
3524   Level: beginner
3525 
3526   Note:
3527   Ignored if MUMPS is not installed or `F` is not a MUMPS matrix
3528 
3529 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3530 @*/
MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)3531 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val)
3532 {
3533   PetscFunctionBegin;
3534   PetscValidType(F, 1);
3535   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3536   PetscValidLogicalCollectiveInt(F, icntl, 2);
3537   PetscValidLogicalCollectiveReal(F, val, 3);
3538   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3539   PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val));
3540   PetscFunctionReturn(PETSC_SUCCESS);
3541 }
3542 
3543 /*@
3544   MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc>
3545 
3546   Logically Collective
3547 
3548   Input Parameters:
3549 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3550 - icntl - index of MUMPS parameter array CNTL()
3551 
3552   Output Parameter:
3553 . val - value of MUMPS CNTL(icntl)
3554 
3555   Level: beginner
3556 
3557 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3558 @*/
MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal * val)3559 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val)
3560 {
3561   PetscFunctionBegin;
3562   PetscValidType(F, 1);
3563   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3564   PetscValidLogicalCollectiveInt(F, icntl, 2);
3565   PetscAssertPointer(val, 3);
3566   PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl);
3567   PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3568   PetscFunctionReturn(PETSC_SUCCESS);
3569 }
3570 
MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt * info)3571 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info)
3572 {
3573   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3574 
3575   PetscFunctionBegin;
3576   *info = mumps->id.INFO(icntl);
3577   PetscFunctionReturn(PETSC_SUCCESS);
3578 }
3579 
MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt * infog)3580 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog)
3581 {
3582   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3583 
3584   PetscFunctionBegin;
3585   *infog = mumps->id.INFOG(icntl);
3586   PetscFunctionReturn(PETSC_SUCCESS);
3587 }
3588 
MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal * rinfo)3589 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo)
3590 {
3591   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3592 
3593   PetscFunctionBegin;
3594   *rinfo = ID_RINFO_GET(mumps->id, icntl);
3595   PetscFunctionReturn(PETSC_SUCCESS);
3596 }
3597 
MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal * rinfog)3598 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog)
3599 {
3600   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3601 
3602   PetscFunctionBegin;
3603   *rinfog = ID_RINFOG_GET(mumps->id, icntl);
3604   PetscFunctionReturn(PETSC_SUCCESS);
3605 }
3606 
MatMumpsGetNullPivots_MUMPS(Mat F,PetscInt * size,PetscInt ** array)3607 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array)
3608 {
3609   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3610 
3611   PetscFunctionBegin;
3612   PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
3613   *size  = 0;
3614   *array = NULL;
3615   if (!mumps->myid) {
3616     *size = mumps->id.INFOG(28);
3617     PetscCall(PetscMalloc1(*size, array));
3618     for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1;
3619   }
3620   PetscFunctionReturn(PETSC_SUCCESS);
3621 }
3622 
MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)3623 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS)
3624 {
3625   Mat          Bt = NULL, Btseq = NULL;
3626   PetscBool    flg;
3627   Mat_MUMPS   *mumps = (Mat_MUMPS *)F->data;
3628   PetscScalar *aa;
3629   PetscInt     spnr, *ia, *ja, M, nrhs;
3630 
3631   PetscFunctionBegin;
3632   PetscAssertPointer(spRHS, 2);
3633   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg));
3634   PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix");
3635   PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
3636   PetscCall(MatTransposeGetMat(spRHS, &Bt));
3637 
3638   PetscCall(MatMumpsSetIcntl(F, 30, 1));
3639 
3640   if (mumps->petsc_size > 1) {
3641     Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data;
3642     Btseq         = b->A;
3643   } else {
3644     Btseq = Bt;
3645   }
3646 
3647   PetscCall(MatGetSize(spRHS, &M, &nrhs));
3648   mumps->id.nrhs = (PetscMUMPSInt)nrhs;
3649   PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs));
3650   mumps->id.rhs = NULL;
3651 
3652   if (!mumps->myid) {
3653     PetscCall(MatSeqAIJGetArray(Btseq, &aa));
3654     PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3655     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3656     PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs));
3657     PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse));
3658   } else {
3659     mumps->id.irhs_ptr    = NULL;
3660     mumps->id.irhs_sparse = NULL;
3661     mumps->id.nz_rhs      = 0;
3662     if (mumps->id.rhs_sparse_len) {
3663       PetscCall(PetscFree(mumps->id.rhs_sparse));
3664       mumps->id.rhs_sparse_len = 0;
3665     }
3666   }
3667   mumps->id.ICNTL(20) = 1; /* rhs is sparse */
3668   mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
3669 
3670   /* solve phase */
3671   mumps->id.job = JOB_SOLVE;
3672   PetscMUMPS_c(mumps);
3673   PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2));
3674 
3675   if (!mumps->myid) {
3676     PetscCall(MatSeqAIJRestoreArray(Btseq, &aa));
3677     PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg));
3678     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure");
3679   }
3680   PetscFunctionReturn(PETSC_SUCCESS);
3681 }
3682 
3683 /*@
3684   MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc>
3685 
3686   Logically Collective
3687 
3688   Input Parameter:
3689 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3690 
3691   Output Parameter:
3692 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A`
3693 
3694   Level: beginner
3695 
3696 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`
3697 @*/
MatMumpsGetInverse(Mat F,Mat spRHS)3698 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS)
3699 {
3700   PetscFunctionBegin;
3701   PetscValidType(F, 1);
3702   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3703   PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS));
3704   PetscFunctionReturn(PETSC_SUCCESS);
3705 }
3706 
MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)3707 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST)
3708 {
3709   Mat spRHS;
3710 
3711   PetscFunctionBegin;
3712   PetscCall(MatCreateTranspose(spRHST, &spRHS));
3713   PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS));
3714   PetscCall(MatDestroy(&spRHS));
3715   PetscFunctionReturn(PETSC_SUCCESS);
3716 }
3717 
3718 /*@
3719   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc>
3720 
3721   Logically Collective
3722 
3723   Input Parameter:
3724 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3725 
3726   Output Parameter:
3727 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T
3728 
3729   Level: beginner
3730 
3731 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
3732 @*/
MatMumpsGetInverseTranspose(Mat F,Mat spRHST)3733 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST)
3734 {
3735   PetscBool flg;
3736 
3737   PetscFunctionBegin;
3738   PetscValidType(F, 1);
3739   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3740   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL));
3741   PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix");
3742   PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST));
3743   PetscFunctionReturn(PETSC_SUCCESS);
3744 }
3745 
MatMumpsSetBlk_MUMPS(Mat F,PetscInt nblk,const PetscInt blkvar[],const PetscInt blkptr[])3746 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3747 {
3748   Mat_MUMPS *mumps = (Mat_MUMPS *)F->data;
3749 
3750   PetscFunctionBegin;
3751   if (nblk) {
3752     PetscAssertPointer(blkptr, 4);
3753     PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk));
3754     PetscCall(PetscFree(mumps->id.blkptr));
3755     PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr));
3756     for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i));
3757     // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15).
3758     // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS().
3759     // mumps->id.ICNTL(15) = 1;
3760     if (blkvar) {
3761       PetscCall(PetscFree(mumps->id.blkvar));
3762       PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar));
3763       for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i));
3764     }
3765   } else {
3766     PetscCall(PetscFree(mumps->id.blkptr));
3767     PetscCall(PetscFree(mumps->id.blkvar));
3768     // mumps->id.ICNTL(15) = 0;
3769     mumps->id.nblk = 0;
3770   }
3771   PetscFunctionReturn(PETSC_SUCCESS);
3772 }
3773 
3774 /*@
3775   MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1`
3776 
3777   Not collective, only relevant on the first process of the MPI communicator
3778 
3779   Input Parameters:
3780 + F      - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3781 . nblk   - the number of blocks
3782 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk`
3783 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks
3784 
3785   Level: advanced
3786 
3787 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()`
3788 @*/
MatMumpsSetBlk(Mat F,PetscInt nblk,const PetscInt blkvar[],const PetscInt blkptr[])3789 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[])
3790 {
3791   PetscFunctionBegin;
3792   PetscValidType(F, 1);
3793   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3794   PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr));
3795   PetscFunctionReturn(PETSC_SUCCESS);
3796 }
3797 
3798 /*@
3799   MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc>
3800 
3801   Logically Collective
3802 
3803   Input Parameters:
3804 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3805 - icntl - index of MUMPS parameter array INFO()
3806 
3807   Output Parameter:
3808 . ival - value of MUMPS INFO(icntl)
3809 
3810   Level: beginner
3811 
3812 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3813 @*/
MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt * ival)3814 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival)
3815 {
3816   PetscFunctionBegin;
3817   PetscValidType(F, 1);
3818   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3819   PetscAssertPointer(ival, 3);
3820   PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3821   PetscFunctionReturn(PETSC_SUCCESS);
3822 }
3823 
3824 /*@
3825   MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc>
3826 
3827   Logically Collective
3828 
3829   Input Parameters:
3830 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3831 - icntl - index of MUMPS parameter array INFOG()
3832 
3833   Output Parameter:
3834 . ival - value of MUMPS INFOG(icntl)
3835 
3836   Level: beginner
3837 
3838 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
3839 @*/
MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt * ival)3840 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival)
3841 {
3842   PetscFunctionBegin;
3843   PetscValidType(F, 1);
3844   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3845   PetscAssertPointer(ival, 3);
3846   PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival));
3847   PetscFunctionReturn(PETSC_SUCCESS);
3848 }
3849 
3850 /*@
3851   MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc>
3852 
3853   Logically Collective
3854 
3855   Input Parameters:
3856 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3857 - icntl - index of MUMPS parameter array RINFO()
3858 
3859   Output Parameter:
3860 . val - value of MUMPS RINFO(icntl)
3861 
3862   Level: beginner
3863 
3864 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
3865 @*/
MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal * val)3866 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val)
3867 {
3868   PetscFunctionBegin;
3869   PetscValidType(F, 1);
3870   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3871   PetscAssertPointer(val, 3);
3872   PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3873   PetscFunctionReturn(PETSC_SUCCESS);
3874 }
3875 
3876 /*@
3877   MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc>
3878 
3879   Logically Collective
3880 
3881   Input Parameters:
3882 + F     - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3883 - icntl - index of MUMPS parameter array RINFOG()
3884 
3885   Output Parameter:
3886 . val - value of MUMPS RINFOG(icntl)
3887 
3888   Level: beginner
3889 
3890 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3891 @*/
MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal * val)3892 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val)
3893 {
3894   PetscFunctionBegin;
3895   PetscValidType(F, 1);
3896   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3897   PetscAssertPointer(val, 3);
3898   PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val));
3899   PetscFunctionReturn(PETSC_SUCCESS);
3900 }
3901 
3902 /*@
3903   MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc>
3904 
3905   Logically Collective
3906 
3907   Input Parameter:
3908 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY`
3909 
3910   Output Parameters:
3911 + size  - local size of the array. The size of the array is non-zero only on MPI rank 0
3912 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible
3913           for freeing this array.
3914 
3915   Level: beginner
3916 
3917 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
3918 @*/
MatMumpsGetNullPivots(Mat F,PetscInt * size,PetscInt ** array)3919 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array)
3920 {
3921   PetscFunctionBegin;
3922   PetscValidType(F, 1);
3923   PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix");
3924   PetscAssertPointer(size, 2);
3925   PetscAssertPointer(array, 3);
3926   PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array));
3927   PetscFunctionReturn(PETSC_SUCCESS);
3928 }
3929 
3930 /*MC
3931   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
3932   MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc>
3933 
3934   Works with `MATAIJ` and `MATSBAIJ` matrices
3935 
3936   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
3937 
3938   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode.
3939   See details below.
3940 
3941   Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver
3942 
3943   Options Database Keys:
3944 +  -mat_mumps_icntl_1  - ICNTL(1): output stream for error messages
3945 .  -mat_mumps_icntl_2  - ICNTL(2): output stream for diagnostic printing, statistics, and warning
3946 .  -mat_mumps_icntl_3  - ICNTL(3): output stream for global information, collected on the host
3947 .  -mat_mumps_icntl_4  - ICNTL(4): level of printing (0 to 4)
3948 .  -mat_mumps_icntl_6  - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
3949 .  -mat_mumps_icntl_7  - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
3950                           Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
3951 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
3952 .  -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
3953 .  -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
3954 .  -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
3955 .  -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
3956 .  -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
3957 .  -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format
3958 .  -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
3959 .  -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
3960 .  -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
3961 .  -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
3962 .  -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
3963 .  -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
3964 .  -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
3965 .  -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering
3966 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
3967 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
3968 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
3969 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
3970 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
3971 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
3972 .  -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB)
3973 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
3974 .  -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism
3975 .  -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase
3976 .  -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization
3977 .  -mat_mumps_cntl_1   - CNTL(1): relative pivoting threshold
3978 .  -mat_mumps_cntl_2   - CNTL(2): stopping criterion of refinement
3979 .  -mat_mumps_cntl_3   - CNTL(3): absolute pivoting threshold
3980 .  -mat_mumps_cntl_4   - CNTL(4): value for static pivoting
3981 .  -mat_mumps_cntl_5   - CNTL(5): fixation for null pivots
3982 .  -mat_mumps_cntl_7   - CNTL(7): precision of the dropping parameter used during BLR factorization
3983 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
3984                                     Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
3985 
3986   Level: beginner
3987 
3988   Notes:
3989   MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will
3990   error if the matrix is Hermitian.
3991 
3992   When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
3993   `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
3994 
3995   When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about
3996   the failure with
3997 .vb
3998           KSPGetPC(ksp,&pc);
3999           PCFactorGetMatrix(pc,&mat);
4000           MatMumpsGetInfo(mat,....);
4001           MatMumpsGetInfog(mat,....); etc.
4002 .ve
4003   Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message.
4004 
4005   MUMPS provides 64-bit integer support in two build modes:
4006   full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
4007   requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
4008 
4009   selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
4010   MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
4011   columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
4012   integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
4013 
4014   With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
4015 
4016   Two modes to run MUMPS/PETSc with OpenMP
4017 .vb
4018    Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
4019    threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpiexec -n 4 ./test".
4020 .ve
4021 
4022 .vb
4023    `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example,
4024    if a compute node has 32 cores and you run on two nodes, you may use "mpiexec -n 64 ./test -mat_mumps_use_omp_threads 16"
4025 .ve
4026 
4027    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
4028    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc`
4029    (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
4030    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
4031    (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided).
4032 
4033    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
4034    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
4035    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
4036    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
4037    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
4038    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
4039    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
4040    MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
4041    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
4042    problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding.
4043    For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and
4044    examine the mapping result.
4045 
4046    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts,
4047    for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc
4048    calls `omp_set_num_threads`(m) internally before calling MUMPS.
4049 
4050    See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating`
4051 
4052 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()`
4053 M*/
4054 
MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A,MatSolverType * type)4055 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type)
4056 {
4057   PetscFunctionBegin;
4058   *type = MATSOLVERMUMPS;
4059   PetscFunctionReturn(PETSC_SUCCESS);
4060 }
4061 
4062 /* MatGetFactor for Seq and MPI AIJ matrices */
MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat * F)4063 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F)
4064 {
4065   Mat         B;
4066   Mat_MUMPS  *mumps;
4067   PetscBool   isSeqAIJ, isDiag, isDense;
4068   PetscMPIInt size;
4069 
4070   PetscFunctionBegin;
4071   if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4072     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4073     *F = NULL;
4074     PetscFunctionReturn(PETSC_SUCCESS);
4075   }
4076   /* Create the factorization matrix */
4077   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
4078   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag));
4079   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4080   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4081   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4082   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4083   PetscCall(MatSetUp(B));
4084 
4085   PetscCall(PetscNew(&mumps));
4086 
4087   B->ops->view    = MatView_MUMPS;
4088   B->ops->getinfo = MatGetInfo_MUMPS;
4089 
4090   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4091   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4092   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4093   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4094   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4095   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4096   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4097   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4098   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4099   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4100   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4101   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4102   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4103   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4104   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4105 
4106   if (ftype == MAT_FACTOR_LU) {
4107     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4108     B->factortype            = MAT_FACTOR_LU;
4109     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
4110     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4111     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4112     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
4113     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4114     mumps->sym = 0;
4115   } else {
4116     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4117     B->factortype                  = MAT_FACTOR_CHOLESKY;
4118     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
4119     else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij;
4120     else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij;
4121     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
4122     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4123     if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
4124     else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4125     else mumps->sym = 2;
4126   }
4127 
4128   /* set solvertype */
4129   PetscCall(PetscFree(B->solvertype));
4130   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4131   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4132   if (size == 1) {
4133     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4134     B->canuseordering = PETSC_TRUE;
4135   }
4136   B->ops->destroy = MatDestroy_MUMPS;
4137   B->data         = (void *)mumps;
4138 
4139   *F               = B;
4140   mumps->id.job    = JOB_NULL;
4141   mumps->ICNTL_pre = NULL;
4142   mumps->CNTL_pre  = NULL;
4143   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4144   PetscFunctionReturn(PETSC_SUCCESS);
4145 }
4146 
4147 /* MatGetFactor for Seq and MPI SBAIJ matrices */
MatGetFactor_sbaij_mumps(Mat A,PETSC_UNUSED MatFactorType ftype,Mat * F)4148 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F)
4149 {
4150   Mat         B;
4151   Mat_MUMPS  *mumps;
4152   PetscBool   isSeqSBAIJ;
4153   PetscMPIInt size;
4154 
4155   PetscFunctionBegin;
4156   if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4157     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4158     *F = NULL;
4159     PetscFunctionReturn(PETSC_SUCCESS);
4160   }
4161   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4162   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4163   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4164   PetscCall(MatSetUp(B));
4165 
4166   PetscCall(PetscNew(&mumps));
4167   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
4168   if (isSeqSBAIJ) {
4169     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
4170   } else {
4171     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
4172   }
4173 
4174   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4175   B->ops->view                   = MatView_MUMPS;
4176   B->ops->getinfo                = MatGetInfo_MUMPS;
4177 
4178   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4179   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4180   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4181   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4182   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4183   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4184   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4185   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4186   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4187   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4188   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4189   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4190   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4191   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4192   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4193 
4194   B->factortype = MAT_FACTOR_CHOLESKY;
4195   if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
4196   else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4197   else mumps->sym = 2;
4198 
4199   /* set solvertype */
4200   PetscCall(PetscFree(B->solvertype));
4201   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4202   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4203   if (size == 1) {
4204     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4205     B->canuseordering = PETSC_TRUE;
4206   }
4207   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
4208   B->ops->destroy = MatDestroy_MUMPS;
4209   B->data         = (void *)mumps;
4210 
4211   *F               = B;
4212   mumps->id.job    = JOB_NULL;
4213   mumps->ICNTL_pre = NULL;
4214   mumps->CNTL_pre  = NULL;
4215   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4216   PetscFunctionReturn(PETSC_SUCCESS);
4217 }
4218 
MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat * F)4219 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F)
4220 {
4221   Mat         B;
4222   Mat_MUMPS  *mumps;
4223   PetscBool   isSeqBAIJ;
4224   PetscMPIInt size;
4225 
4226   PetscFunctionBegin;
4227   /* Create the factorization matrix */
4228   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
4229   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4230   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4231   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4232   PetscCall(MatSetUp(B));
4233 
4234   PetscCall(PetscNew(&mumps));
4235   PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
4236   B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
4237   B->factortype            = MAT_FACTOR_LU;
4238   if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
4239   else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
4240   mumps->sym = 0;
4241   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4242 
4243   B->ops->view    = MatView_MUMPS;
4244   B->ops->getinfo = MatGetInfo_MUMPS;
4245 
4246   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4247   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4248   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4249   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4250   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4251   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4252   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4253   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4254   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4255   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4256   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4257   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4258   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4259   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4260   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4261 
4262   /* set solvertype */
4263   PetscCall(PetscFree(B->solvertype));
4264   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4265   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4266   if (size == 1) {
4267     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4268     B->canuseordering = PETSC_TRUE;
4269   }
4270   B->ops->destroy = MatDestroy_MUMPS;
4271   B->data         = (void *)mumps;
4272 
4273   *F               = B;
4274   mumps->id.job    = JOB_NULL;
4275   mumps->ICNTL_pre = NULL;
4276   mumps->CNTL_pre  = NULL;
4277   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4278   PetscFunctionReturn(PETSC_SUCCESS);
4279 }
4280 
4281 /* MatGetFactor for Seq and MPI SELL matrices */
MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat * F)4282 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F)
4283 {
4284   Mat         B;
4285   Mat_MUMPS  *mumps;
4286   PetscBool   isSeqSELL;
4287   PetscMPIInt size;
4288 
4289   PetscFunctionBegin;
4290   /* Create the factorization matrix */
4291   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL));
4292   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4293   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4294   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4295   PetscCall(MatSetUp(B));
4296 
4297   PetscCall(PetscNew(&mumps));
4298 
4299   B->ops->view    = MatView_MUMPS;
4300   B->ops->getinfo = MatGetInfo_MUMPS;
4301 
4302   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4303   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4304   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4305   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4306   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4307   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4308   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4309   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4310   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4311   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4312   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4313   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4314 
4315   PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4316   B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4317   B->factortype            = MAT_FACTOR_LU;
4318   PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented");
4319   mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
4320   mumps->sym              = 0;
4321   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU]));
4322 
4323   /* set solvertype */
4324   PetscCall(PetscFree(B->solvertype));
4325   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4326   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4327   if (size == 1) {
4328     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
4329     B->canuseordering = PETSC_TRUE;
4330   }
4331   B->ops->destroy = MatDestroy_MUMPS;
4332   B->data         = (void *)mumps;
4333 
4334   *F               = B;
4335   mumps->id.job    = JOB_NULL;
4336   mumps->ICNTL_pre = NULL;
4337   mumps->CNTL_pre  = NULL;
4338   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4339   PetscFunctionReturn(PETSC_SUCCESS);
4340 }
4341 
4342 /* MatGetFactor for MATNEST matrices */
MatGetFactor_nest_mumps(Mat A,MatFactorType ftype,Mat * F)4343 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F)
4344 {
4345   Mat         B, **mats;
4346   Mat_MUMPS  *mumps;
4347   PetscInt    nr, nc;
4348   PetscMPIInt size;
4349   PetscBool   flg = PETSC_TRUE;
4350 
4351   PetscFunctionBegin;
4352   if (PetscDefined(USE_COMPLEX) && ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
4353     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n"));
4354     *F = NULL;
4355     PetscFunctionReturn(PETSC_SUCCESS);
4356   }
4357 
4358   /* Return if some condition is not satisfied */
4359   *F = NULL;
4360   PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats));
4361   if (ftype == MAT_FACTOR_CHOLESKY) {
4362     IS       *rows, *cols;
4363     PetscInt *m, *M;
4364 
4365     PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc);
4366     PetscCall(PetscMalloc2(nr, &rows, nc, &cols));
4367     PetscCall(MatNestGetISs(A, rows, cols));
4368     for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg));
4369     if (!flg) {
4370       PetscCall(PetscFree2(rows, cols));
4371       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n"));
4372       PetscFunctionReturn(PETSC_SUCCESS);
4373     }
4374     PetscCall(PetscMalloc2(nr, &m, nr, &M));
4375     for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r]));
4376     for (PetscInt r = 0; flg && r < nr; r++)
4377       for (PetscInt k = r + 1; flg && k < nr; k++)
4378         if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE;
4379     PetscCall(PetscFree2(m, M));
4380     PetscCall(PetscFree2(rows, cols));
4381     if (!flg) {
4382       PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n"));
4383       PetscFunctionReturn(PETSC_SUCCESS);
4384     }
4385   }
4386 
4387   for (PetscInt r = 0; r < nr; r++) {
4388     for (PetscInt c = 0; c < nc; c++) {
4389       Mat       sub = mats[r][c];
4390       PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense;
4391 
4392       if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue;
4393       PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL));
4394       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ));
4395       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ));
4396       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ));
4397       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ));
4398       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ));
4399       PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ));
4400       PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag));
4401       PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL));
4402       if (ftype == MAT_FACTOR_CHOLESKY) {
4403         if (r == c) {
4404           if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) {
4405             PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4406             flg = PETSC_FALSE;
4407           }
4408         } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4409           PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name));
4410           flg = PETSC_FALSE;
4411         }
4412       } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) {
4413         PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name));
4414         flg = PETSC_FALSE;
4415       }
4416     }
4417   }
4418   if (!flg) PetscFunctionReturn(PETSC_SUCCESS);
4419 
4420   /* Create the factorization matrix */
4421   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
4422   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
4423   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name));
4424   PetscCall(MatSetUp(B));
4425 
4426   PetscCall(PetscNew(&mumps));
4427 
4428   B->ops->view    = MatView_MUMPS;
4429   B->ops->getinfo = MatGetInfo_MUMPS;
4430 
4431   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps));
4432   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS));
4433   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS));
4434   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS));
4435   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS));
4436   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS));
4437   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS));
4438   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS));
4439   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS));
4440   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS));
4441   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS));
4442   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS));
4443   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS));
4444   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS));
4445   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS));
4446 
4447   if (ftype == MAT_FACTOR_LU) {
4448     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
4449     B->factortype            = MAT_FACTOR_LU;
4450     mumps->sym               = 0;
4451   } else {
4452     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
4453     B->factortype                  = MAT_FACTOR_CHOLESKY;
4454     if (PetscDefined(USE_COMPLEX)) mumps->sym = 2;
4455     else if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
4456     else mumps->sym = 2;
4457   }
4458   mumps->ConvertToTriples = MatConvertToTriples_nest_xaij;
4459   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype]));
4460 
4461   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
4462   if (size == 1) {
4463     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
4464     B->canuseordering = PETSC_TRUE;
4465   }
4466 
4467   /* set solvertype */
4468   PetscCall(PetscFree(B->solvertype));
4469   PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype));
4470   B->ops->destroy = MatDestroy_MUMPS;
4471   B->data         = (void *)mumps;
4472 
4473   *F               = B;
4474   mumps->id.job    = JOB_NULL;
4475   mumps->ICNTL_pre = NULL;
4476   mumps->CNTL_pre  = NULL;
4477   mumps->matstruc  = DIFFERENT_NONZERO_PATTERN;
4478   PetscFunctionReturn(PETSC_SUCCESS);
4479 }
4480 
MatSolverTypeRegister_MUMPS(void)4481 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
4482 {
4483   PetscFunctionBegin;
4484   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4485   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4486   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4487   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4488   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4489   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4490   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4491   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps));
4492   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps));
4493   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps));
4494   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps));
4495   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4496   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4497   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4498   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4499   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps));
4500   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps));
4501   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps));
4502   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps));
4503   PetscFunctionReturn(PETSC_SUCCESS);
4504 }
4505